138fd1498Szrj /* Loop interchange.
238fd1498Szrj    Copyright (C) 2017-2018 Free Software Foundation, Inc.
338fd1498Szrj    Contributed by ARM Ltd.
438fd1498Szrj 
538fd1498Szrj This file is part of GCC.
638fd1498Szrj 
738fd1498Szrj GCC is free software; you can redistribute it and/or modify it
838fd1498Szrj under the terms of the GNU General Public License as published by the
938fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
1038fd1498Szrj later version.
1138fd1498Szrj 
1238fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1338fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1438fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1538fd1498Szrj for more details.
1638fd1498Szrj 
1738fd1498Szrj You should have received a copy of the GNU General Public License
1838fd1498Szrj along with GCC; see the file COPYING3.  If not see
1938fd1498Szrj <http://www.gnu.org/licenses/>.  */
2038fd1498Szrj 
2138fd1498Szrj #include "config.h"
2238fd1498Szrj #include "system.h"
2338fd1498Szrj #include "coretypes.h"
2438fd1498Szrj #include "backend.h"
2538fd1498Szrj #include "is-a.h"
2638fd1498Szrj #include "tree.h"
2738fd1498Szrj #include "gimple.h"
2838fd1498Szrj #include "tree-pass.h"
2938fd1498Szrj #include "ssa.h"
3038fd1498Szrj #include "gimple-pretty-print.h"
3138fd1498Szrj #include "fold-const.h"
3238fd1498Szrj #include "gimplify.h"
3338fd1498Szrj #include "gimple-iterator.h"
3438fd1498Szrj #include "gimplify-me.h"
3538fd1498Szrj #include "cfgloop.h"
3638fd1498Szrj #include "params.h"
3738fd1498Szrj #include "tree-ssa.h"
3838fd1498Szrj #include "tree-scalar-evolution.h"
3938fd1498Szrj #include "tree-ssa-loop-manip.h"
4038fd1498Szrj #include "tree-ssa-loop-niter.h"
4138fd1498Szrj #include "tree-ssa-loop-ivopts.h"
4238fd1498Szrj #include "tree-ssa-dce.h"
4338fd1498Szrj #include "tree-data-ref.h"
4438fd1498Szrj #include "tree-vectorizer.h"
4538fd1498Szrj 
4638fd1498Szrj /* This pass performs loop interchange: for example, the loop nest
4738fd1498Szrj 
4838fd1498Szrj    for (int j = 0; j < N; j++)
4938fd1498Szrj      for (int k = 0; k < N; k++)
5038fd1498Szrj        for (int i = 0; i < N; i++)
5138fd1498Szrj 	 c[i][j] = c[i][j] + a[i][k]*b[k][j];
5238fd1498Szrj 
5338fd1498Szrj    is transformed to
5438fd1498Szrj 
5538fd1498Szrj    for (int i = 0; i < N; i++)
5638fd1498Szrj      for (int j = 0; j < N; j++)
5738fd1498Szrj        for (int k = 0; k < N; k++)
5838fd1498Szrj 	 c[i][j] = c[i][j] + a[i][k]*b[k][j];
5938fd1498Szrj 
6038fd1498Szrj    This pass implements loop interchange in the following steps:
6138fd1498Szrj 
6238fd1498Szrj      1) Find perfect loop nest for each innermost loop and compute data
6338fd1498Szrj 	dependence relations for it.  For above example, loop nest is
6438fd1498Szrj 	<loop_j, loop_k, loop_i>.
6538fd1498Szrj      2) From innermost to outermost loop, this pass tries to interchange
6638fd1498Szrj 	each loop pair.  For above case, it firstly tries to interchange
6738fd1498Szrj 	<loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
6838fd1498Szrj 	Then it tries to interchange <loop_j, loop_i> and loop nest becomes
6938fd1498Szrj 	<loop_i, loop_j, loop_k>.  The overall effect is to move innermost
7038fd1498Szrj 	loop to the outermost position.  For loop pair <loop_i, loop_j>
7138fd1498Szrj 	to be interchanged, we:
7238fd1498Szrj      3) Check if data dependence relations are valid for loop interchange.
7338fd1498Szrj      4) Check if both loops can be interchanged in terms of transformation.
7438fd1498Szrj      5) Check if interchanging the two loops is profitable.
7538fd1498Szrj      6) Interchange the two loops by mapping induction variables.
7638fd1498Szrj 
7738fd1498Szrj    This pass also handles reductions in loop nest.  So far we only support
7838fd1498Szrj    simple reduction of inner loop and double reduction of the loop nest.  */
7938fd1498Szrj 
8038fd1498Szrj /* Maximum number of stmts in each loop that should be interchanged.  */
8138fd1498Szrj #define MAX_NUM_STMT    (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
8238fd1498Szrj /* Maximum number of data references in loop nest.  */
8338fd1498Szrj #define MAX_DATAREFS    (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
8438fd1498Szrj 
8538fd1498Szrj /* Comparison ratio of access stride between inner/outer loops to be
8638fd1498Szrj    interchanged.  This is the minimum stride ratio for loop interchange
8738fd1498Szrj    to be profitable.  */
8838fd1498Szrj #define OUTER_STRIDE_RATIO  (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
8938fd1498Szrj /* The same as above, but we require higher ratio for interchanging the
9038fd1498Szrj    innermost two loops.  */
9138fd1498Szrj #define INNER_STRIDE_RATIO  ((OUTER_STRIDE_RATIO) + 1)
9238fd1498Szrj 
9338fd1498Szrj /* Comparison ratio of stmt cost between inner/outer loops.  Loops won't
9438fd1498Szrj    be interchanged if outer loop has too many stmts.  */
9538fd1498Szrj #define STMT_COST_RATIO     (3)
9638fd1498Szrj 
9738fd1498Szrj /* Vector of strides that DR accesses in each level loop of a loop nest.  */
9838fd1498Szrj #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
9938fd1498Szrj 
10038fd1498Szrj /* Structure recording loop induction variable.  */
10138fd1498Szrj typedef struct induction
10238fd1498Szrj {
10338fd1498Szrj   /* IV itself.  */
10438fd1498Szrj   tree var;
10538fd1498Szrj   /* IV's initializing value, which is the init arg of the IV PHI node.  */
10638fd1498Szrj   tree init_val;
10738fd1498Szrj   /* IV's initializing expr, which is (the expanded result of) init_val.  */
10838fd1498Szrj   tree init_expr;
10938fd1498Szrj   /* IV's step.  */
11038fd1498Szrj   tree step;
11138fd1498Szrj } *induction_p;
11238fd1498Szrj 
11338fd1498Szrj /* Enum type for loop reduction variable.  */
11438fd1498Szrj enum reduction_type
11538fd1498Szrj {
11638fd1498Szrj   UNKNOWN_RTYPE = 0,
11738fd1498Szrj   SIMPLE_RTYPE,
11838fd1498Szrj   DOUBLE_RTYPE
11938fd1498Szrj };
12038fd1498Szrj 
12138fd1498Szrj /* Structure recording loop reduction variable.  */
12238fd1498Szrj typedef struct reduction
12338fd1498Szrj {
12438fd1498Szrj   /* Reduction itself.  */
12538fd1498Szrj   tree var;
12638fd1498Szrj   /* PHI node defining reduction variable.  */
12738fd1498Szrj   gphi *phi;
12838fd1498Szrj   /* Init and next variables of the reduction.  */
12938fd1498Szrj   tree init;
13038fd1498Szrj   tree next;
13138fd1498Szrj   /* Lcssa PHI node if reduction is used outside of its definition loop.  */
13238fd1498Szrj   gphi *lcssa_phi;
13338fd1498Szrj   /* Stmts defining init and next.  */
13438fd1498Szrj   gimple *producer;
13538fd1498Szrj   gimple *consumer;
13638fd1498Szrj   /* If init is loaded from memory, this is the loading memory reference.  */
13738fd1498Szrj   tree init_ref;
13838fd1498Szrj   /* If reduction is finally stored to memory, this is the stored memory
13938fd1498Szrj      reference.  */
14038fd1498Szrj   tree fini_ref;
14138fd1498Szrj   enum reduction_type type;
14238fd1498Szrj } *reduction_p;
14338fd1498Szrj 
14438fd1498Szrj 
14538fd1498Szrj /* Dump reduction RE.  */
14638fd1498Szrj 
14738fd1498Szrj static void
dump_reduction(reduction_p re)14838fd1498Szrj dump_reduction (reduction_p re)
14938fd1498Szrj {
15038fd1498Szrj   if (re->type == SIMPLE_RTYPE)
15138fd1498Szrj     fprintf (dump_file, "  Simple reduction:  ");
15238fd1498Szrj   else if (re->type == DOUBLE_RTYPE)
15338fd1498Szrj     fprintf (dump_file, "  Double reduction:  ");
15438fd1498Szrj   else
15538fd1498Szrj     fprintf (dump_file, "  Unknown reduction:  ");
15638fd1498Szrj 
15738fd1498Szrj   print_gimple_stmt (dump_file, re->phi, 0);
15838fd1498Szrj }
15938fd1498Szrj 
16038fd1498Szrj /* Dump LOOP's induction IV.  */
16138fd1498Szrj static void
dump_induction(struct loop * loop,induction_p iv)16238fd1498Szrj dump_induction (struct loop *loop, induction_p iv)
16338fd1498Szrj {
16438fd1498Szrj   fprintf (dump_file, "  Induction:  ");
16538fd1498Szrj   print_generic_expr (dump_file, iv->var, TDF_SLIM);
16638fd1498Szrj   fprintf (dump_file, " = {");
16738fd1498Szrj   print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
16838fd1498Szrj   fprintf (dump_file, ", ");
16938fd1498Szrj   print_generic_expr (dump_file, iv->step, TDF_SLIM);
17038fd1498Szrj   fprintf (dump_file, "}_%d\n", loop->num);
17138fd1498Szrj }
17238fd1498Szrj 
17338fd1498Szrj /* Loop candidate for interchange.  */
17438fd1498Szrj 
17538fd1498Szrj struct loop_cand
17638fd1498Szrj {
17738fd1498Szrj   loop_cand (struct loop *, struct loop *);
17838fd1498Szrj   ~loop_cand ();
17938fd1498Szrj 
18038fd1498Szrj   reduction_p find_reduction_by_stmt (gimple *);
18138fd1498Szrj   void classify_simple_reduction (reduction_p);
18238fd1498Szrj   bool analyze_iloop_reduction_var (tree);
18338fd1498Szrj   bool analyze_oloop_reduction_var (loop_cand *, tree);
18438fd1498Szrj   bool analyze_induction_var (tree, tree);
18538fd1498Szrj   bool analyze_carried_vars (loop_cand *);
18638fd1498Szrj   bool analyze_lcssa_phis (void);
18738fd1498Szrj   bool can_interchange_p (loop_cand *);
18838fd1498Szrj   void undo_simple_reduction (reduction_p, bitmap);
18938fd1498Szrj 
19038fd1498Szrj   /* The loop itself.  */
19138fd1498Szrj   struct loop *m_loop;
19238fd1498Szrj   /* The outer loop for interchange.  It equals to loop if this loop cand
19338fd1498Szrj      itself represents the outer loop.  */
19438fd1498Szrj   struct loop *m_outer;
19538fd1498Szrj   /* Vector of induction variables in loop.  */
19638fd1498Szrj   vec<induction_p> m_inductions;
19738fd1498Szrj   /* Vector of reduction variables in loop.  */
19838fd1498Szrj   vec<reduction_p> m_reductions;
19938fd1498Szrj   /* Lcssa PHI nodes of this loop.  */
20038fd1498Szrj   vec<gphi *> m_lcssa_nodes;
20138fd1498Szrj   /* Single exit edge of this loop.  */
20238fd1498Szrj   edge m_exit;
20338fd1498Szrj   /* Basic blocks of this loop.  */
20438fd1498Szrj   basic_block *m_bbs;
20538fd1498Szrj   /* Number of stmts of this loop.  Inner loops' stmts are not included.  */
20638fd1498Szrj   int m_num_stmts;
20738fd1498Szrj   /* Number of constant initialized simple reduction.  */
20838fd1498Szrj   int m_const_init_reduc;
20938fd1498Szrj };
21038fd1498Szrj 
21138fd1498Szrj /* Constructor.  */
21238fd1498Szrj 
loop_cand(struct loop * loop,struct loop * outer)21338fd1498Szrj loop_cand::loop_cand (struct loop *loop, struct loop *outer)
21438fd1498Szrj   : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
21538fd1498Szrj     m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
21638fd1498Szrj {
21738fd1498Szrj     m_inductions.create (3);
21838fd1498Szrj     m_reductions.create (3);
21938fd1498Szrj     m_lcssa_nodes.create (3);
22038fd1498Szrj }
22138fd1498Szrj 
22238fd1498Szrj /* Destructor.  */
22338fd1498Szrj 
~loop_cand()22438fd1498Szrj loop_cand::~loop_cand ()
22538fd1498Szrj {
22638fd1498Szrj   induction_p iv;
22738fd1498Szrj   for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
22838fd1498Szrj     free (iv);
22938fd1498Szrj 
23038fd1498Szrj   reduction_p re;
23138fd1498Szrj   for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
23238fd1498Szrj     free (re);
23338fd1498Szrj 
23438fd1498Szrj   m_inductions.release ();
23538fd1498Szrj   m_reductions.release ();
23638fd1498Szrj   m_lcssa_nodes.release ();
23738fd1498Szrj   free (m_bbs);
23838fd1498Szrj }
23938fd1498Szrj 
24038fd1498Szrj /* Return single use stmt of VAR in LOOP, otherwise return NULL.  */
24138fd1498Szrj 
24238fd1498Szrj static gimple *
single_use_in_loop(tree var,struct loop * loop)24338fd1498Szrj single_use_in_loop (tree var, struct loop *loop)
24438fd1498Szrj {
24538fd1498Szrj   gimple *stmt, *res = NULL;
24638fd1498Szrj   use_operand_p use_p;
24738fd1498Szrj   imm_use_iterator iterator;
24838fd1498Szrj 
24938fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
25038fd1498Szrj     {
25138fd1498Szrj       stmt = USE_STMT (use_p);
25238fd1498Szrj       if (is_gimple_debug (stmt))
25338fd1498Szrj 	continue;
25438fd1498Szrj 
25538fd1498Szrj       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
25638fd1498Szrj 	continue;
25738fd1498Szrj 
25838fd1498Szrj       if (res)
25938fd1498Szrj 	return NULL;
26038fd1498Szrj 
26138fd1498Szrj       res = stmt;
26238fd1498Szrj     }
26338fd1498Szrj   return res;
26438fd1498Szrj }
26538fd1498Szrj 
26638fd1498Szrj /* Return true if E is unsupported in loop interchange, i.e, E is a complex
26738fd1498Szrj    edge or part of irreducible loop.  */
26838fd1498Szrj 
26938fd1498Szrj static inline bool
unsupported_edge(edge e)27038fd1498Szrj unsupported_edge (edge e)
27138fd1498Szrj {
27238fd1498Szrj   return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
27338fd1498Szrj }
27438fd1498Szrj 
27538fd1498Szrj /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
27638fd1498Szrj    stmt.  */
27738fd1498Szrj 
27838fd1498Szrj reduction_p
find_reduction_by_stmt(gimple * stmt)27938fd1498Szrj loop_cand::find_reduction_by_stmt (gimple *stmt)
28038fd1498Szrj {
28138fd1498Szrj   gphi *phi = dyn_cast <gphi *> (stmt);
28238fd1498Szrj   reduction_p re;
28338fd1498Szrj 
28438fd1498Szrj   for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
28538fd1498Szrj     if ((phi != NULL && phi == re->lcssa_phi)
28638fd1498Szrj 	|| (stmt == re->producer || stmt == re->consumer))
28738fd1498Szrj       return re;
28838fd1498Szrj 
28938fd1498Szrj   return NULL;
29038fd1498Szrj }
29138fd1498Szrj 
29238fd1498Szrj /* Return true if current loop_cand be interchanged.  ILOOP is not NULL if
29338fd1498Szrj    current loop_cand is outer loop in loop nest.  */
29438fd1498Szrj 
29538fd1498Szrj bool
can_interchange_p(loop_cand * iloop)29638fd1498Szrj loop_cand::can_interchange_p (loop_cand *iloop)
29738fd1498Szrj {
29838fd1498Szrj   /* For now we only support at most one reduction.  */
29938fd1498Szrj   unsigned allowed_reduction_num = 1;
30038fd1498Szrj 
30138fd1498Szrj   /* Only support reduction if the loop nest to be interchanged is the
30238fd1498Szrj      innermostin two loops.  */
30338fd1498Szrj   if ((iloop == NULL && m_loop->inner != NULL)
30438fd1498Szrj        || (iloop != NULL && iloop->m_loop->inner != NULL))
30538fd1498Szrj     allowed_reduction_num = 0;
30638fd1498Szrj 
30738fd1498Szrj   if (m_reductions.length () > allowed_reduction_num
30838fd1498Szrj       || (m_reductions.length () == 1
30938fd1498Szrj 	  && m_reductions[0]->type == UNKNOWN_RTYPE))
31038fd1498Szrj     return false;
31138fd1498Szrj 
31238fd1498Szrj   /* Only support lcssa PHI node which is for reduction.  */
31338fd1498Szrj   if (m_lcssa_nodes.length () > allowed_reduction_num)
31438fd1498Szrj     return false;
31538fd1498Szrj 
31638fd1498Szrj   /* Check if basic block has any unsupported operation.  Note basic blocks
31738fd1498Szrj      of inner loops are not checked here.  */
31838fd1498Szrj   for (unsigned i = 0; i < m_loop->num_nodes; i++)
31938fd1498Szrj     {
32038fd1498Szrj       basic_block bb = m_bbs[i];
32138fd1498Szrj       gphi_iterator psi;
32238fd1498Szrj       gimple_stmt_iterator gsi;
32338fd1498Szrj 
32438fd1498Szrj       /* Skip basic blocks of inner loops.  */
32538fd1498Szrj       if (bb->loop_father != m_loop)
32638fd1498Szrj        continue;
32738fd1498Szrj 
32838fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
32938fd1498Szrj 	{
33038fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
33138fd1498Szrj 	  if (is_gimple_debug (stmt))
33238fd1498Szrj 	    continue;
33338fd1498Szrj 
33438fd1498Szrj 	  if (gimple_has_side_effects (stmt))
33538fd1498Szrj 	    return false;
33638fd1498Szrj 
33738fd1498Szrj 	  m_num_stmts++;
33838fd1498Szrj 	  if (gcall *call = dyn_cast <gcall *> (stmt))
33938fd1498Szrj 	    {
34038fd1498Szrj 	      /* In basic block of outer loop, the call should be cheap since
34138fd1498Szrj 		 it will be moved to inner loop.  */
34238fd1498Szrj 	      if (iloop != NULL
34338fd1498Szrj 		  && !gimple_inexpensive_call_p (call))
34438fd1498Szrj 		return false;
34538fd1498Szrj 	      continue;
34638fd1498Szrj 	    }
34738fd1498Szrj 
34838fd1498Szrj 	  if (!iloop || !gimple_vuse (stmt))
34938fd1498Szrj 	    continue;
35038fd1498Szrj 
35138fd1498Szrj 	  /* Support stmt accessing memory in outer loop only if it is for
35238fd1498Szrj 	     inner loop's reduction.  */
35338fd1498Szrj 	  if (iloop->find_reduction_by_stmt (stmt))
35438fd1498Szrj 	    continue;
35538fd1498Szrj 
35638fd1498Szrj 	  tree lhs;
35738fd1498Szrj 	  /* Support loop invariant memory reference if it's only used once by
35838fd1498Szrj 	     inner loop.  */
35938fd1498Szrj 	  /* ???  How's this checking for invariantness?  */
36038fd1498Szrj 	  if (gimple_assign_single_p (stmt)
36138fd1498Szrj 	      && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
36238fd1498Szrj 	      && TREE_CODE (lhs) == SSA_NAME
36338fd1498Szrj 	      && single_use_in_loop (lhs, iloop->m_loop))
36438fd1498Szrj 	    continue;
36538fd1498Szrj 
36638fd1498Szrj 	  return false;
36738fd1498Szrj 	}
36838fd1498Szrj       /* Check if loop has too many stmts.  */
36938fd1498Szrj       if (m_num_stmts > MAX_NUM_STMT)
37038fd1498Szrj 	return false;
37138fd1498Szrj 
37238fd1498Szrj       /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
37338fd1498Szrj 	 loop's header, or PHI nodes in dest bb of inner loop's exit edge.  */
37438fd1498Szrj       if (!iloop || bb == m_loop->header
37538fd1498Szrj 	  || bb == iloop->m_exit->dest)
37638fd1498Szrj 	continue;
37738fd1498Szrj 
37838fd1498Szrj       /* Don't allow any other PHI nodes.  */
37938fd1498Szrj       for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
38038fd1498Szrj 	if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
38138fd1498Szrj 	  return false;
38238fd1498Szrj     }
38338fd1498Szrj 
38438fd1498Szrj   return true;
38538fd1498Szrj }
38638fd1498Szrj 
38738fd1498Szrj /* Programmers and optimizers (like loop store motion) may optimize code:
38838fd1498Szrj 
38938fd1498Szrj      for (int i = 0; i < N; i++)
39038fd1498Szrj        for (int j = 0; j < N; j++)
39138fd1498Szrj 	 a[i] += b[j][i] * c[j][i];
39238fd1498Szrj 
39338fd1498Szrj    into reduction:
39438fd1498Szrj 
39538fd1498Szrj      for (int i = 0; i < N; i++)
39638fd1498Szrj        {
39738fd1498Szrj 	 // producer.  Note sum can be intitialized to a constant.
39838fd1498Szrj 	 int sum = a[i];
39938fd1498Szrj 	 for (int j = 0; j < N; j++)
40038fd1498Szrj 	   {
40138fd1498Szrj 	     sum += b[j][i] * c[j][i];
40238fd1498Szrj 	   }
40338fd1498Szrj 	 // consumer.
40438fd1498Szrj 	 a[i] = sum;
40538fd1498Szrj        }
40638fd1498Szrj 
40738fd1498Szrj    The result code can't be interchanged without undoing the optimization.
40838fd1498Szrj    This function classifies this kind reduction and records information so
40938fd1498Szrj    that we can undo the store motion during interchange.  */
41038fd1498Szrj 
41138fd1498Szrj void
classify_simple_reduction(reduction_p re)41238fd1498Szrj loop_cand::classify_simple_reduction (reduction_p re)
41338fd1498Szrj {
41438fd1498Szrj   gimple *producer, *consumer;
41538fd1498Szrj 
41638fd1498Szrj   /* Check init variable of reduction and how it is initialized.  */
41738fd1498Szrj   if (TREE_CODE (re->init) == SSA_NAME)
41838fd1498Szrj     {
41938fd1498Szrj       producer = SSA_NAME_DEF_STMT (re->init);
42038fd1498Szrj       re->producer = producer;
42138fd1498Szrj       basic_block bb = gimple_bb (producer);
42238fd1498Szrj       if (!bb || bb->loop_father != m_outer)
42338fd1498Szrj 	return;
42438fd1498Szrj 
42538fd1498Szrj       if (!gimple_assign_load_p (producer))
42638fd1498Szrj 	return;
42738fd1498Szrj 
42838fd1498Szrj       re->init_ref = gimple_assign_rhs1 (producer);
42938fd1498Szrj     }
43038fd1498Szrj   else if (CONSTANT_CLASS_P (re->init))
43138fd1498Szrj     m_const_init_reduc++;
43238fd1498Szrj   else
43338fd1498Szrj     return;
43438fd1498Szrj 
43538fd1498Szrj   /* Check how reduction variable is used.  */
43638fd1498Szrj   consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
43738fd1498Szrj   if (!consumer
43838fd1498Szrj       || !gimple_store_p (consumer))
43938fd1498Szrj     return;
44038fd1498Szrj 
44138fd1498Szrj   re->fini_ref = gimple_get_lhs (consumer);
44238fd1498Szrj   re->consumer = consumer;
44338fd1498Szrj 
44438fd1498Szrj   /* Simple reduction with constant initializer.  */
44538fd1498Szrj   if (!re->init_ref)
44638fd1498Szrj     {
44738fd1498Szrj       gcc_assert (CONSTANT_CLASS_P (re->init));
44838fd1498Szrj       re->init_ref = unshare_expr (re->fini_ref);
44938fd1498Szrj     }
45038fd1498Szrj 
45138fd1498Szrj   /* Require memory references in producer and consumer are the same so
45238fd1498Szrj      that we can undo reduction during interchange.  */
45338fd1498Szrj   if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
45438fd1498Szrj     return;
45538fd1498Szrj 
45638fd1498Szrj   re->type = SIMPLE_RTYPE;
45738fd1498Szrj }
45838fd1498Szrj 
45938fd1498Szrj /* Analyze reduction variable VAR for inner loop of the loop nest to be
46038fd1498Szrj    interchanged.  Return true if analysis succeeds.  */
46138fd1498Szrj 
46238fd1498Szrj bool
analyze_iloop_reduction_var(tree var)46338fd1498Szrj loop_cand::analyze_iloop_reduction_var (tree var)
46438fd1498Szrj {
46538fd1498Szrj   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
46638fd1498Szrj   gphi *lcssa_phi = NULL, *use_phi;
46738fd1498Szrj   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
46838fd1498Szrj   tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
46938fd1498Szrj   reduction_p re;
47038fd1498Szrj   gimple *stmt, *next_def, *single_use = NULL;
47138fd1498Szrj   use_operand_p use_p;
47238fd1498Szrj   imm_use_iterator iterator;
47338fd1498Szrj 
47438fd1498Szrj   if (TREE_CODE (next) != SSA_NAME)
47538fd1498Szrj     return false;
47638fd1498Szrj 
47738fd1498Szrj   next_def = SSA_NAME_DEF_STMT (next);
47838fd1498Szrj   basic_block bb = gimple_bb (next_def);
47938fd1498Szrj   if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
48038fd1498Szrj     return false;
48138fd1498Szrj 
48238fd1498Szrj   /* In restricted reduction, the var is (and must be) used in defining
48338fd1498Szrj      the updated var.  The process can be depicted as below:
48438fd1498Szrj 
48538fd1498Szrj 		var ;; = PHI<init, next>
48638fd1498Szrj 		 |
48738fd1498Szrj 		 |
48838fd1498Szrj 		 v
48938fd1498Szrj       +---------------------+
49038fd1498Szrj       | reduction operators | <-- other operands
49138fd1498Szrj       +---------------------+
49238fd1498Szrj 		 |
49338fd1498Szrj 		 |
49438fd1498Szrj 		 v
49538fd1498Szrj 		next
49638fd1498Szrj 
49738fd1498Szrj      In terms loop interchange, we don't change how NEXT is computed based
49838fd1498Szrj      on VAR and OTHER OPERANDS.  In case of double reduction in loop nest
49938fd1498Szrj      to be interchanged, we don't changed it at all.  In the case of simple
50038fd1498Szrj      reduction in inner loop, we only make change how VAR/NEXT is loaded or
50138fd1498Szrj      stored.  With these conditions, we can relax restrictions on reduction
50238fd1498Szrj      in a way that reduction operation is seen as black box.  In general,
50338fd1498Szrj      we can ignore reassociation of reduction operator; we can handle fake
50438fd1498Szrj      reductions in which VAR is not even used to compute NEXT.  */
50538fd1498Szrj   if (! single_imm_use (var, &use_p, &single_use)
50638fd1498Szrj       || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
50738fd1498Szrj     return false;
50838fd1498Szrj 
50938fd1498Szrj   /* Check the reduction operation.  We require a left-associative operation.
51038fd1498Szrj      For FP math we also need to be allowed to associate operations.  */
51138fd1498Szrj   if (gassign *ass = dyn_cast <gassign *> (single_use))
51238fd1498Szrj     {
51338fd1498Szrj       enum tree_code code = gimple_assign_rhs_code (ass);
51438fd1498Szrj       if (! (associative_tree_code (code)
51538fd1498Szrj 	     || (code == MINUS_EXPR
51638fd1498Szrj 		 && use_p->use == gimple_assign_rhs1_ptr (ass)))
51738fd1498Szrj 	  || (FLOAT_TYPE_P (TREE_TYPE (var))
51838fd1498Szrj 	      && ! flag_associative_math))
51938fd1498Szrj 	return false;
52038fd1498Szrj     }
52138fd1498Szrj   else
52238fd1498Szrj     return false;
52338fd1498Szrj 
52438fd1498Szrj   /* Handle and verify a series of stmts feeding the reduction op.  */
52538fd1498Szrj   if (single_use != next_def
52638fd1498Szrj       && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
52738fd1498Szrj 				gimple_assign_rhs_code (single_use)))
52838fd1498Szrj     return false;
52938fd1498Szrj 
53038fd1498Szrj   /* Only support cases in which INIT is used in inner loop.  */
53138fd1498Szrj   if (TREE_CODE (init) == SSA_NAME)
53238fd1498Szrj     FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
53338fd1498Szrj       {
53438fd1498Szrj 	stmt = USE_STMT (use_p);
53538fd1498Szrj 	if (is_gimple_debug (stmt))
53638fd1498Szrj 	  continue;
53738fd1498Szrj 
53838fd1498Szrj 	if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
53938fd1498Szrj 	  return false;
54038fd1498Szrj       }
54138fd1498Szrj 
54238fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
54338fd1498Szrj     {
54438fd1498Szrj       stmt = USE_STMT (use_p);
54538fd1498Szrj       if (is_gimple_debug (stmt))
54638fd1498Szrj 	continue;
54738fd1498Szrj 
54838fd1498Szrj       /* Or else it's used in PHI itself.  */
54938fd1498Szrj       use_phi = dyn_cast <gphi *> (stmt);
55038fd1498Szrj       if (use_phi == phi)
55138fd1498Szrj 	continue;
55238fd1498Szrj 
55338fd1498Szrj       if (use_phi != NULL
55438fd1498Szrj 	  && lcssa_phi == NULL
55538fd1498Szrj 	  && gimple_bb (stmt) == m_exit->dest
55638fd1498Szrj 	  && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
55738fd1498Szrj 	lcssa_phi = use_phi;
55838fd1498Szrj       else
55938fd1498Szrj 	return false;
56038fd1498Szrj     }
56138fd1498Szrj   if (!lcssa_phi)
56238fd1498Szrj     return false;
56338fd1498Szrj 
56438fd1498Szrj   re = XCNEW (struct reduction);
56538fd1498Szrj   re->var = var;
56638fd1498Szrj   re->init = init;
56738fd1498Szrj   re->next = next;
56838fd1498Szrj   re->phi = phi;
56938fd1498Szrj   re->lcssa_phi = lcssa_phi;
57038fd1498Szrj 
57138fd1498Szrj   classify_simple_reduction (re);
57238fd1498Szrj 
57338fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
57438fd1498Szrj     dump_reduction (re);
57538fd1498Szrj 
57638fd1498Szrj   m_reductions.safe_push (re);
57738fd1498Szrj   return true;
57838fd1498Szrj }
57938fd1498Szrj 
58038fd1498Szrj /* Analyze reduction variable VAR for outer loop of the loop nest to be
58138fd1498Szrj    interchanged.  ILOOP is not NULL and points to inner loop.  For the
58238fd1498Szrj    moment, we only support double reduction for outer loop, like:
58338fd1498Szrj 
58438fd1498Szrj      for (int i = 0; i < n; i++)
58538fd1498Szrj        {
58638fd1498Szrj 	 int sum = 0;
58738fd1498Szrj 
58838fd1498Szrj 	 for (int j = 0; j < n; j++)    // outer loop
58938fd1498Szrj 	   for (int k = 0; k < n; k++)  // inner loop
59038fd1498Szrj 	     sum += a[i][k]*b[k][j];
59138fd1498Szrj 
59238fd1498Szrj 	 s[i] = sum;
59338fd1498Szrj        }
59438fd1498Szrj 
59538fd1498Szrj    Note the innermost two loops are the loop nest to be interchanged.
59638fd1498Szrj    Return true if analysis succeeds.  */
59738fd1498Szrj 
59838fd1498Szrj bool
analyze_oloop_reduction_var(loop_cand * iloop,tree var)59938fd1498Szrj loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
60038fd1498Szrj {
60138fd1498Szrj   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
60238fd1498Szrj   gphi *lcssa_phi = NULL, *use_phi;
60338fd1498Szrj   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
60438fd1498Szrj   tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
60538fd1498Szrj   reduction_p re;
60638fd1498Szrj   gimple *stmt, *next_def;
60738fd1498Szrj   use_operand_p use_p;
60838fd1498Szrj   imm_use_iterator iterator;
60938fd1498Szrj 
61038fd1498Szrj   if (TREE_CODE (next) != SSA_NAME)
61138fd1498Szrj     return false;
61238fd1498Szrj 
61338fd1498Szrj   next_def = SSA_NAME_DEF_STMT (next);
61438fd1498Szrj   basic_block bb = gimple_bb (next_def);
61538fd1498Szrj   if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
61638fd1498Szrj     return false;
61738fd1498Szrj 
61838fd1498Szrj   /* Find inner loop's simple reduction that uses var as initializer.  */
61938fd1498Szrj   reduction_p inner_re = NULL;
62038fd1498Szrj   for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
62138fd1498Szrj     if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
62238fd1498Szrj       break;
62338fd1498Szrj 
62438fd1498Szrj   if (inner_re == NULL
62538fd1498Szrj       || inner_re->type != UNKNOWN_RTYPE
62638fd1498Szrj       || inner_re->producer != phi)
62738fd1498Szrj     return false;
62838fd1498Szrj 
62938fd1498Szrj   /* In case of double reduction, outer loop's reduction should be updated
63038fd1498Szrj      by inner loop's simple reduction.  */
63138fd1498Szrj   if (next_def != inner_re->lcssa_phi)
63238fd1498Szrj     return false;
63338fd1498Szrj 
63438fd1498Szrj   /* Outer loop's reduction should only be used to initialize inner loop's
63538fd1498Szrj      simple reduction.  */
63638fd1498Szrj   if (! single_imm_use (var, &use_p, &stmt)
63738fd1498Szrj       || stmt != inner_re->phi)
63838fd1498Szrj     return false;
63938fd1498Szrj 
64038fd1498Szrj   /* Check this reduction is correctly used outside of loop via lcssa phi.  */
64138fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
64238fd1498Szrj     {
64338fd1498Szrj       stmt = USE_STMT (use_p);
64438fd1498Szrj       if (is_gimple_debug (stmt))
64538fd1498Szrj 	continue;
64638fd1498Szrj 
64738fd1498Szrj       /* Or else it's used in PHI itself.  */
64838fd1498Szrj       use_phi = dyn_cast <gphi *> (stmt);
64938fd1498Szrj       if (use_phi == phi)
65038fd1498Szrj 	continue;
65138fd1498Szrj 
65238fd1498Szrj       if (lcssa_phi == NULL
65338fd1498Szrj 	  && use_phi != NULL
65438fd1498Szrj 	  && gimple_bb (stmt) == m_exit->dest
65538fd1498Szrj 	  && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
65638fd1498Szrj 	lcssa_phi = use_phi;
65738fd1498Szrj       else
65838fd1498Szrj 	return false;
65938fd1498Szrj     }
66038fd1498Szrj   if (!lcssa_phi)
66138fd1498Szrj     return false;
66238fd1498Szrj 
66338fd1498Szrj   re = XCNEW (struct reduction);
66438fd1498Szrj   re->var = var;
66538fd1498Szrj   re->init = init;
66638fd1498Szrj   re->next = next;
66738fd1498Szrj   re->phi = phi;
66838fd1498Szrj   re->lcssa_phi = lcssa_phi;
66938fd1498Szrj   re->type = DOUBLE_RTYPE;
67038fd1498Szrj   inner_re->type = DOUBLE_RTYPE;
67138fd1498Szrj 
67238fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
67338fd1498Szrj     dump_reduction (re);
67438fd1498Szrj 
67538fd1498Szrj   m_reductions.safe_push (re);
67638fd1498Szrj   return true;
67738fd1498Szrj }
67838fd1498Szrj 
67938fd1498Szrj /* Return true if VAR is induction variable of current loop whose scev is
68038fd1498Szrj    specified by CHREC.  */
68138fd1498Szrj 
68238fd1498Szrj bool
analyze_induction_var(tree var,tree chrec)68338fd1498Szrj loop_cand::analyze_induction_var (tree var, tree chrec)
68438fd1498Szrj {
68538fd1498Szrj   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
68638fd1498Szrj   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
68738fd1498Szrj 
68838fd1498Szrj   /* Var is loop invariant, though it's unlikely to happen.  */
68938fd1498Szrj   if (tree_does_not_contain_chrecs (chrec))
69038fd1498Szrj     {
691*58e805e6Szrj       /* Punt on floating point invariants if honoring signed zeros,
692*58e805e6Szrj 	 representing that as + 0.0 would change the result if init
693*58e805e6Szrj 	 is -0.0.  Similarly for SNaNs it can raise exception.  */
694*58e805e6Szrj       if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
695*58e805e6Szrj 	return false;
69638fd1498Szrj       struct induction *iv = XCNEW (struct induction);
69738fd1498Szrj       iv->var = var;
69838fd1498Szrj       iv->init_val = init;
69938fd1498Szrj       iv->init_expr = chrec;
700*58e805e6Szrj       iv->step = build_zero_cst (TREE_TYPE (chrec));
70138fd1498Szrj       m_inductions.safe_push (iv);
70238fd1498Szrj       return true;
70338fd1498Szrj     }
70438fd1498Szrj 
70538fd1498Szrj   if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
70638fd1498Szrj       || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
70738fd1498Szrj       || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
70838fd1498Szrj       || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
70938fd1498Szrj     return false;
71038fd1498Szrj 
71138fd1498Szrj   struct induction *iv = XCNEW (struct induction);
71238fd1498Szrj   iv->var = var;
71338fd1498Szrj   iv->init_val = init;
71438fd1498Szrj   iv->init_expr = CHREC_LEFT (chrec);
71538fd1498Szrj   iv->step = CHREC_RIGHT (chrec);
71638fd1498Szrj 
71738fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
71838fd1498Szrj     dump_induction (m_loop, iv);
71938fd1498Szrj 
72038fd1498Szrj   m_inductions.safe_push (iv);
72138fd1498Szrj   return true;
72238fd1498Szrj }
72338fd1498Szrj 
72438fd1498Szrj /* Return true if all loop carried variables defined in loop header can
72538fd1498Szrj    be successfully analyzed.  */
72638fd1498Szrj 
72738fd1498Szrj bool
analyze_carried_vars(loop_cand * iloop)72838fd1498Szrj loop_cand::analyze_carried_vars (loop_cand *iloop)
72938fd1498Szrj {
73038fd1498Szrj   edge e = loop_preheader_edge (m_outer);
73138fd1498Szrj   gphi_iterator gsi;
73238fd1498Szrj 
73338fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
73438fd1498Szrj     fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
73538fd1498Szrj 
73638fd1498Szrj   for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
73738fd1498Szrj     {
73838fd1498Szrj       gphi *phi = gsi.phi ();
73938fd1498Szrj 
74038fd1498Szrj       tree var = PHI_RESULT (phi);
74138fd1498Szrj       if (virtual_operand_p (var))
74238fd1498Szrj 	continue;
74338fd1498Szrj 
74438fd1498Szrj       tree chrec = analyze_scalar_evolution (m_loop, var);
74538fd1498Szrj       chrec = instantiate_scev (e, m_loop, chrec);
74638fd1498Szrj 
74738fd1498Szrj       /* Analyze var as reduction variable.  */
74838fd1498Szrj       if (chrec_contains_undetermined (chrec)
74938fd1498Szrj 	  || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
75038fd1498Szrj 	{
75138fd1498Szrj 	  if (iloop && !analyze_oloop_reduction_var (iloop, var))
75238fd1498Szrj 	    return false;
75338fd1498Szrj 	  if (!iloop && !analyze_iloop_reduction_var (var))
75438fd1498Szrj 	    return false;
75538fd1498Szrj 	}
75638fd1498Szrj       /* Analyze var as induction variable.  */
75738fd1498Szrj       else if (!analyze_induction_var (var, chrec))
75838fd1498Szrj 	return false;
75938fd1498Szrj     }
76038fd1498Szrj 
76138fd1498Szrj   return true;
76238fd1498Szrj }
76338fd1498Szrj 
76438fd1498Szrj /* Return TRUE if loop closed PHI nodes can be analyzed successfully.  */
76538fd1498Szrj 
76638fd1498Szrj bool
analyze_lcssa_phis(void)76738fd1498Szrj loop_cand::analyze_lcssa_phis (void)
76838fd1498Szrj {
76938fd1498Szrj   gphi_iterator gsi;
77038fd1498Szrj   for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
77138fd1498Szrj     {
77238fd1498Szrj       gphi *phi = gsi.phi ();
77338fd1498Szrj 
77438fd1498Szrj       if (virtual_operand_p (PHI_RESULT (phi)))
77538fd1498Szrj 	continue;
77638fd1498Szrj 
77738fd1498Szrj       /* TODO: We only support lcssa phi for reduction for now.  */
77838fd1498Szrj       if (!find_reduction_by_stmt (phi))
77938fd1498Szrj 	return false;
78038fd1498Szrj     }
78138fd1498Szrj 
78238fd1498Szrj   return true;
78338fd1498Szrj }
78438fd1498Szrj 
78538fd1498Szrj /* CONSUMER is a stmt in BB storing reduction result into memory object.
78638fd1498Szrj    When the reduction is intialized from constant value, we need to add
78738fd1498Szrj    a stmt loading from the memory object to target basic block in inner
78838fd1498Szrj    loop during undoing the reduction.  Problem is that memory reference
78938fd1498Szrj    may use ssa variables not dominating the target basic block.  This
79038fd1498Szrj    function finds all stmts on which CONSUMER depends in basic block BB,
79138fd1498Szrj    records and returns them via STMTS.  */
79238fd1498Szrj 
79338fd1498Szrj static void
find_deps_in_bb_for_stmt(gimple_seq * stmts,basic_block bb,gimple * consumer)79438fd1498Szrj find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
79538fd1498Szrj {
79638fd1498Szrj   auto_vec<gimple *, 4> worklist;
79738fd1498Szrj   use_operand_p use_p;
79838fd1498Szrj   ssa_op_iter iter;
79938fd1498Szrj   gimple *stmt, *def_stmt;
80038fd1498Szrj   gimple_stmt_iterator gsi;
80138fd1498Szrj 
80238fd1498Szrj   /* First clear flag for stmts in bb.  */
80338fd1498Szrj   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
80438fd1498Szrj     gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
80538fd1498Szrj 
80638fd1498Szrj   /* DFS search all depended stmts in bb and mark flag for these stmts.  */
80738fd1498Szrj   worklist.safe_push (consumer);
80838fd1498Szrj   while (!worklist.is_empty ())
80938fd1498Szrj     {
81038fd1498Szrj       stmt = worklist.pop ();
81138fd1498Szrj       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
81238fd1498Szrj 	{
81338fd1498Szrj 	  def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
81438fd1498Szrj 
81538fd1498Szrj 	  if (is_a <gphi *> (def_stmt)
81638fd1498Szrj 	      || gimple_bb (def_stmt) != bb
81738fd1498Szrj 	      || gimple_plf (def_stmt, GF_PLF_1))
81838fd1498Szrj 	    continue;
81938fd1498Szrj 
82038fd1498Szrj 	  worklist.safe_push (def_stmt);
82138fd1498Szrj 	}
82238fd1498Szrj       gimple_set_plf (stmt, GF_PLF_1, true);
82338fd1498Szrj     }
82438fd1498Szrj   for (gsi = gsi_start_nondebug_bb (bb);
82538fd1498Szrj        !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
82638fd1498Szrj     {
82738fd1498Szrj       /* Move dep stmts to sequence STMTS.  */
82838fd1498Szrj       if (gimple_plf (stmt, GF_PLF_1))
82938fd1498Szrj 	{
83038fd1498Szrj 	  gsi_remove (&gsi, false);
83138fd1498Szrj 	  gimple_seq_add_stmt_without_update (stmts, stmt);
83238fd1498Szrj 	}
83338fd1498Szrj       else
83438fd1498Szrj 	gsi_next_nondebug (&gsi);
83538fd1498Szrj     }
83638fd1498Szrj }
83738fd1498Szrj 
83838fd1498Szrj /* User can write, optimizers can generate simple reduction RE for inner
83938fd1498Szrj    loop.  In order to make interchange valid, we have to undo reduction by
84038fd1498Szrj    moving producer and consumer stmts into the inner loop.  For example,
84138fd1498Szrj    below code:
84238fd1498Szrj 
84338fd1498Szrj      init = MEM_REF[idx];		//producer
84438fd1498Szrj      loop:
84538fd1498Szrj        var = phi<init, next>
84638fd1498Szrj        next = var op ...
84738fd1498Szrj      reduc_sum = phi<next>
84838fd1498Szrj      MEM_REF[idx] = reduc_sum		//consumer
84938fd1498Szrj 
85038fd1498Szrj    is transformed into:
85138fd1498Szrj 
85238fd1498Szrj      loop:
85338fd1498Szrj        new_var = MEM_REF[idx];		//producer after moving
85438fd1498Szrj        next = new_var op ...
85538fd1498Szrj        MEM_REF[idx] = next;		//consumer after moving
85638fd1498Szrj 
85738fd1498Szrj    Note if the reduction variable is initialized to constant, like:
85838fd1498Szrj 
85938fd1498Szrj      var = phi<0.0, next>
86038fd1498Szrj 
86138fd1498Szrj    we compute new_var as below:
86238fd1498Szrj 
86338fd1498Szrj      loop:
86438fd1498Szrj        tmp = MEM_REF[idx];
86538fd1498Szrj        new_var = !first_iteration ? tmp : 0.0;
86638fd1498Szrj 
86738fd1498Szrj    so that the initial const is used in the first iteration of loop.  Also
86838fd1498Szrj    record ssa variables for dead code elimination in DCE_SEEDS.  */
86938fd1498Szrj 
87038fd1498Szrj void
undo_simple_reduction(reduction_p re,bitmap dce_seeds)87138fd1498Szrj loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
87238fd1498Szrj {
87338fd1498Szrj   gimple *stmt;
87438fd1498Szrj   gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
87538fd1498Szrj   gimple_seq stmts = NULL;
87638fd1498Szrj   tree new_var;
87738fd1498Szrj 
87838fd1498Szrj   /* Prepare the initialization stmts and insert it to inner loop.  */
87938fd1498Szrj   if (re->producer != NULL)
88038fd1498Szrj     {
88138fd1498Szrj       gimple_set_vuse (re->producer, NULL_TREE);
88238fd1498Szrj       from = gsi_for_stmt (re->producer);
88338fd1498Szrj       gsi_remove (&from, false);
88438fd1498Szrj       gimple_seq_add_stmt_without_update (&stmts, re->producer);
88538fd1498Szrj       new_var = re->init;
88638fd1498Szrj     }
88738fd1498Szrj   else
88838fd1498Szrj     {
88938fd1498Szrj       /* Find all stmts on which expression "MEM_REF[idx]" depends.  */
89038fd1498Szrj       find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
89138fd1498Szrj       /* Because we generate new stmt loading from the MEM_REF to TMP.  */
89238fd1498Szrj       tree cond, tmp = copy_ssa_name (re->var);
89338fd1498Szrj       stmt = gimple_build_assign (tmp, re->init_ref);
89438fd1498Szrj       gimple_seq_add_stmt_without_update (&stmts, stmt);
89538fd1498Szrj 
89638fd1498Szrj       /* Init new_var to MEM_REF or CONST depending on if it is the first
89738fd1498Szrj 	 iteration.  */
89838fd1498Szrj       induction_p iv = m_inductions[0];
89938fd1498Szrj       cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
90038fd1498Szrj       new_var = copy_ssa_name (re->var);
90138fd1498Szrj       stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
90238fd1498Szrj       gimple_seq_add_stmt_without_update (&stmts, stmt);
90338fd1498Szrj     }
90438fd1498Szrj   gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
90538fd1498Szrj 
90638fd1498Szrj   /* Replace all uses of reduction var with new variable.  */
90738fd1498Szrj   use_operand_p use_p;
90838fd1498Szrj   imm_use_iterator iterator;
90938fd1498Szrj   FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
91038fd1498Szrj     {
91138fd1498Szrj       FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
91238fd1498Szrj 	SET_USE (use_p, new_var);
91338fd1498Szrj 
91438fd1498Szrj       update_stmt (stmt);
91538fd1498Szrj     }
91638fd1498Szrj 
91738fd1498Szrj   /* Move consumer stmt into inner loop, just after reduction next's def.  */
91838fd1498Szrj   unlink_stmt_vdef (re->consumer);
91938fd1498Szrj   release_ssa_name (gimple_vdef (re->consumer));
92038fd1498Szrj   gimple_set_vdef (re->consumer, NULL_TREE);
92138fd1498Szrj   gimple_set_vuse (re->consumer, NULL_TREE);
92238fd1498Szrj   gimple_assign_set_rhs1 (re->consumer, re->next);
92338fd1498Szrj   from = gsi_for_stmt (re->consumer);
92438fd1498Szrj   to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
92538fd1498Szrj   gsi_move_after (&from, &to);
92638fd1498Szrj 
92738fd1498Szrj   /* Mark the reduction variables for DCE.  */
92838fd1498Szrj   bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
92938fd1498Szrj   bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
93038fd1498Szrj }
93138fd1498Szrj 
93238fd1498Szrj /* Free DATAREFS and its auxiliary memory.  */
93338fd1498Szrj 
93438fd1498Szrj static void
free_data_refs_with_aux(vec<data_reference_p> datarefs)93538fd1498Szrj free_data_refs_with_aux (vec<data_reference_p> datarefs)
93638fd1498Szrj {
93738fd1498Szrj   data_reference_p dr;
93838fd1498Szrj   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
93938fd1498Szrj     if (dr->aux != NULL)
94038fd1498Szrj       {
94138fd1498Szrj 	DR_ACCESS_STRIDE (dr)->release ();
94238fd1498Szrj 	delete (vec<tree> *) dr->aux;
94338fd1498Szrj       }
94438fd1498Szrj 
94538fd1498Szrj   free_data_refs (datarefs);
94638fd1498Szrj }
94738fd1498Szrj 
94838fd1498Szrj /* Class for loop interchange transformation.  */
94938fd1498Szrj 
95038fd1498Szrj class tree_loop_interchange
95138fd1498Szrj {
95238fd1498Szrj public:
tree_loop_interchange(vec<struct loop * > loop_nest)95338fd1498Szrj   tree_loop_interchange (vec<struct loop *> loop_nest)
95438fd1498Szrj     : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
95538fd1498Szrj       m_dce_seeds (BITMAP_ALLOC (NULL)) { }
~tree_loop_interchange()95638fd1498Szrj   ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
95738fd1498Szrj   bool interchange (vec<data_reference_p>, vec<ddr_p>);
95838fd1498Szrj 
95938fd1498Szrj private:
96038fd1498Szrj   void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
96138fd1498Szrj   bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
96238fd1498Szrj   void interchange_loops (loop_cand &, loop_cand &);
96338fd1498Szrj   void map_inductions_to_loop (loop_cand &, loop_cand &);
96438fd1498Szrj   void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
96538fd1498Szrj 
96638fd1498Szrj   /* The whole loop nest in which interchange is ongoing.  */
96738fd1498Szrj   vec<struct loop *> m_loop_nest;
96838fd1498Szrj   /* We create new IV which is only used in loop's exit condition check.
96938fd1498Szrj      In case of 3-level loop nest interchange, when we interchange the
97038fd1498Szrj      innermost two loops, new IV created in the middle level loop does
97138fd1498Szrj      not need to be preserved in interchanging the outermost two loops
97238fd1498Szrj      later.  We record the IV so that it can be skipped.  */
97338fd1498Szrj   tree m_niters_iv_var;
97438fd1498Szrj   /* Bitmap of seed variables for dead code elimination after interchange.  */
97538fd1498Szrj   bitmap m_dce_seeds;
97638fd1498Szrj };
97738fd1498Szrj 
97838fd1498Szrj /* Update data refs' access stride and dependence information after loop
97938fd1498Szrj    interchange.  I_IDX/O_IDX gives indices of interchanged loops in loop
98038fd1498Szrj    nest.  DATAREFS are data references.  DDRS are data dependences.  */
98138fd1498Szrj 
98238fd1498Szrj void
update_data_info(unsigned i_idx,unsigned o_idx,vec<data_reference_p> datarefs,vec<ddr_p> ddrs)98338fd1498Szrj tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
98438fd1498Szrj 					 vec<data_reference_p> datarefs,
98538fd1498Szrj 					 vec<ddr_p> ddrs)
98638fd1498Szrj {
98738fd1498Szrj   struct data_reference *dr;
98838fd1498Szrj   struct data_dependence_relation *ddr;
98938fd1498Szrj 
99038fd1498Szrj   /* Update strides of data references.  */
99138fd1498Szrj   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
99238fd1498Szrj     {
99338fd1498Szrj       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
99438fd1498Szrj       gcc_assert (stride->length () > i_idx);
99538fd1498Szrj       std::swap ((*stride)[i_idx], (*stride)[o_idx]);
99638fd1498Szrj     }
99738fd1498Szrj   /* Update data dependences.  */
99838fd1498Szrj   for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
99938fd1498Szrj     if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
100038fd1498Szrj       {
100138fd1498Szrj         for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
100238fd1498Szrj 	  {
100338fd1498Szrj 	    lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
100438fd1498Szrj 	    std::swap (dist_vect[i_idx], dist_vect[o_idx]);
100538fd1498Szrj 	  }
100638fd1498Szrj       }
100738fd1498Szrj }
100838fd1498Szrj 
100938fd1498Szrj /* Check data dependence relations, return TRUE if it's valid to interchange
101038fd1498Szrj    two loops specified by I_IDX/O_IDX.  Theoretically, interchanging the two
101138fd1498Szrj    loops is valid only if dist vector, after interchanging, doesn't have '>'
101238fd1498Szrj    as the leftmost non-'=' direction.  Practically, this function have been
101338fd1498Szrj    conservative here by not checking some valid cases.  */
101438fd1498Szrj 
101538fd1498Szrj bool
valid_data_dependences(unsigned i_idx,unsigned o_idx,vec<ddr_p> ddrs)101638fd1498Szrj tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
101738fd1498Szrj 					       vec<ddr_p> ddrs)
101838fd1498Szrj {
101938fd1498Szrj   struct data_dependence_relation *ddr;
102038fd1498Szrj 
102138fd1498Szrj   for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
102238fd1498Szrj     {
102338fd1498Szrj       /* Skip no-dependence case.  */
102438fd1498Szrj       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
102538fd1498Szrj 	continue;
102638fd1498Szrj 
102738fd1498Szrj       for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
102838fd1498Szrj 	{
102938fd1498Szrj 	  lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
103038fd1498Szrj 	  unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
103138fd1498Szrj 
103238fd1498Szrj 	  /* If there is no carried dependence.  */
103338fd1498Szrj 	  if (level == 0)
103438fd1498Szrj 	    continue;
103538fd1498Szrj 
103638fd1498Szrj 	  level --;
103738fd1498Szrj 
103838fd1498Szrj 	  /* If dependence is not carried by any loop in between the two
103938fd1498Szrj 	     loops [oloop, iloop] to interchange.  */
104038fd1498Szrj 	  if (level < o_idx || level > i_idx)
104138fd1498Szrj 	    continue;
104238fd1498Szrj 
104338fd1498Szrj 	  /* Be conservative, skip case if either direction at i_idx/o_idx
104438fd1498Szrj 	     levels is not '=' or '<'.  */
104538fd1498Szrj 	  if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
104638fd1498Szrj 	    return false;
104738fd1498Szrj 	}
104838fd1498Szrj     }
104938fd1498Szrj 
105038fd1498Szrj   return true;
105138fd1498Szrj }
105238fd1498Szrj 
105338fd1498Szrj /* Interchange two loops specified by ILOOP and OLOOP.  */
105438fd1498Szrj 
105538fd1498Szrj void
interchange_loops(loop_cand & iloop,loop_cand & oloop)105638fd1498Szrj tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
105738fd1498Szrj {
105838fd1498Szrj   reduction_p re;
105938fd1498Szrj   gimple_stmt_iterator gsi;
106038fd1498Szrj   tree i_niters, o_niters, var_after;
106138fd1498Szrj 
106238fd1498Szrj   /* Undo inner loop's simple reduction.  */
106338fd1498Szrj   for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
106438fd1498Szrj     if (re->type != DOUBLE_RTYPE)
106538fd1498Szrj       {
106638fd1498Szrj 	if (re->producer)
106738fd1498Szrj 	  reset_debug_uses (re->producer);
106838fd1498Szrj 
106938fd1498Szrj 	iloop.undo_simple_reduction (re, m_dce_seeds);
107038fd1498Szrj       }
107138fd1498Szrj 
107238fd1498Szrj   /* Only need to reset debug uses for double reduction.  */
107338fd1498Szrj   for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
107438fd1498Szrj     {
107538fd1498Szrj       gcc_assert (re->type == DOUBLE_RTYPE);
107638fd1498Szrj       reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
107738fd1498Szrj       reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
107838fd1498Szrj     }
107938fd1498Szrj 
108038fd1498Szrj   /* Prepare niters for both loops.  */
108138fd1498Szrj   struct loop *loop_nest = m_loop_nest[0];
108238fd1498Szrj   edge instantiate_below = loop_preheader_edge (loop_nest);
108338fd1498Szrj   gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
108438fd1498Szrj   i_niters = number_of_latch_executions (iloop.m_loop);
108538fd1498Szrj   i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
108638fd1498Szrj   i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
108738fd1498Szrj 			       i_niters);
108838fd1498Szrj   i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
108938fd1498Szrj 				       NULL_TREE, false, GSI_CONTINUE_LINKING);
109038fd1498Szrj   o_niters = number_of_latch_executions (oloop.m_loop);
109138fd1498Szrj   if (oloop.m_loop != loop_nest)
109238fd1498Szrj     {
109338fd1498Szrj       o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
109438fd1498Szrj       o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
109538fd1498Szrj 				   o_niters);
109638fd1498Szrj     }
109738fd1498Szrj   o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
109838fd1498Szrj 				       NULL_TREE, false, GSI_CONTINUE_LINKING);
109938fd1498Szrj 
110038fd1498Szrj   /* Move src's code to tgt loop.  This is necessary when src is the outer
110138fd1498Szrj      loop and tgt is the inner loop.  */
110238fd1498Szrj   move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
110338fd1498Szrj 
110438fd1498Szrj   /* Map outer loop's IV to inner loop, and vice versa.  */
110538fd1498Szrj   map_inductions_to_loop (oloop, iloop);
110638fd1498Szrj   map_inductions_to_loop (iloop, oloop);
110738fd1498Szrj 
110838fd1498Szrj   /* Create canonical IV for both loops.  Note canonical IV for outer/inner
110938fd1498Szrj      loop is actually from inner/outer loop.  Also we record the new IV
111038fd1498Szrj      created for the outer loop so that it can be skipped in later loop
111138fd1498Szrj      interchange.  */
111238fd1498Szrj   create_canonical_iv (oloop.m_loop, oloop.m_exit,
111338fd1498Szrj 		       i_niters, &m_niters_iv_var, &var_after);
111438fd1498Szrj   bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
111538fd1498Szrj   create_canonical_iv (iloop.m_loop, iloop.m_exit,
111638fd1498Szrj 		       o_niters, NULL, &var_after);
111738fd1498Szrj   bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
111838fd1498Szrj 
111938fd1498Szrj   /* Scrap niters estimation of interchanged loops.  */
112038fd1498Szrj   iloop.m_loop->any_upper_bound = false;
112138fd1498Szrj   iloop.m_loop->any_likely_upper_bound = false;
112238fd1498Szrj   free_numbers_of_iterations_estimates (iloop.m_loop);
112338fd1498Szrj   oloop.m_loop->any_upper_bound = false;
112438fd1498Szrj   oloop.m_loop->any_likely_upper_bound = false;
112538fd1498Szrj   free_numbers_of_iterations_estimates (oloop.m_loop);
112638fd1498Szrj 
112738fd1498Szrj   /* Clear all cached scev information.  This is expensive but shouldn't be
112838fd1498Szrj      a problem given we interchange in very limited times.  */
112938fd1498Szrj   scev_reset_htab ();
113038fd1498Szrj 
113138fd1498Szrj   /* ???  The association between the loop data structure and the
113238fd1498Szrj      CFG changed, so what was loop N at the source level is now
113338fd1498Szrj      loop M.  We should think of retaining the association or breaking
113438fd1498Szrj      it fully by creating a new loop instead of re-using the "wrong" one.  */
113538fd1498Szrj }
113638fd1498Szrj 
113738fd1498Szrj /* Map induction variables of SRC loop to TGT loop.  The function firstly
113838fd1498Szrj    creates the same IV of SRC loop in TGT loop, then deletes the original
113938fd1498Szrj    IV and re-initialize it using the newly created IV.  For example, loop
114038fd1498Szrj    nest:
114138fd1498Szrj 
114238fd1498Szrj      for (i = 0; i < N; i++)
114338fd1498Szrj        for (j = 0; j < M; j++)
114438fd1498Szrj 	 {
114538fd1498Szrj 	   //use of i;
114638fd1498Szrj 	   //use of j;
114738fd1498Szrj 	 }
114838fd1498Szrj 
114938fd1498Szrj    will be transformed into:
115038fd1498Szrj 
115138fd1498Szrj      for (jj = 0; jj < M; jj++)
115238fd1498Szrj        for (ii = 0; ii < N; ii++)
115338fd1498Szrj 	 {
115438fd1498Szrj 	   //use of ii;
115538fd1498Szrj 	   //use of jj;
115638fd1498Szrj 	 }
115738fd1498Szrj 
115838fd1498Szrj    after loop interchange.  */
115938fd1498Szrj 
116038fd1498Szrj void
map_inductions_to_loop(loop_cand & src,loop_cand & tgt)116138fd1498Szrj tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
116238fd1498Szrj {
116338fd1498Szrj   induction_p iv;
116438fd1498Szrj   edge e = tgt.m_exit;
116538fd1498Szrj   gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
116638fd1498Szrj 
116738fd1498Szrj   /* Map source loop's IV to target loop.  */
116838fd1498Szrj   for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
116938fd1498Szrj     {
117038fd1498Szrj       gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
117138fd1498Szrj       gcc_assert (is_a <gphi *> (stmt));
117238fd1498Szrj 
117338fd1498Szrj       use_operand_p use_p;
117438fd1498Szrj       /* Only map original IV to target loop.  */
117538fd1498Szrj       if (m_niters_iv_var != iv->var)
117638fd1498Szrj 	{
117738fd1498Szrj 	  /* Map the IV by creating the same one in target loop.  */
117838fd1498Szrj 	  tree var_before, var_after;
117938fd1498Szrj 	  tree base = unshare_expr (iv->init_expr);
118038fd1498Szrj 	  tree step = unshare_expr (iv->step);
118138fd1498Szrj 	  create_iv (base, step, SSA_NAME_VAR (iv->var),
118238fd1498Szrj 		     tgt.m_loop, &incr_pos, false, &var_before, &var_after);
118338fd1498Szrj 	  bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
118438fd1498Szrj 	  bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
118538fd1498Szrj 
118638fd1498Szrj 	  /* Replace uses of the original IV var with newly created IV var.  */
118738fd1498Szrj 	  imm_use_iterator imm_iter;
118838fd1498Szrj 	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
118938fd1498Szrj 	    {
119038fd1498Szrj 	      FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
119138fd1498Szrj 		SET_USE (use_p, var_before);
119238fd1498Szrj 
119338fd1498Szrj 	      update_stmt (use_stmt);
119438fd1498Szrj 	    }
119538fd1498Szrj 	}
119638fd1498Szrj 
119738fd1498Szrj       /* Mark all uses for DCE.  */
119838fd1498Szrj       ssa_op_iter op_iter;
119938fd1498Szrj       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
120038fd1498Szrj 	{
120138fd1498Szrj 	  tree use = USE_FROM_PTR (use_p);
120238fd1498Szrj 	  if (TREE_CODE (use) == SSA_NAME
120338fd1498Szrj 	      && ! SSA_NAME_IS_DEFAULT_DEF (use))
120438fd1498Szrj 	    bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
120538fd1498Szrj 	}
120638fd1498Szrj 
120738fd1498Szrj       /* Delete definition of the original IV in the source loop.  */
120838fd1498Szrj       gsi = gsi_for_stmt (stmt);
120938fd1498Szrj       remove_phi_node (&gsi, true);
121038fd1498Szrj     }
121138fd1498Szrj }
121238fd1498Szrj 
121338fd1498Szrj /* Move stmts of outer loop to inner loop.  */
121438fd1498Szrj 
121538fd1498Szrj void
move_code_to_inner_loop(struct loop * outer,struct loop * inner,basic_block * outer_bbs)121638fd1498Szrj tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
121738fd1498Szrj 						struct loop *inner,
121838fd1498Szrj 						basic_block *outer_bbs)
121938fd1498Szrj {
122038fd1498Szrj   basic_block oloop_exit_bb = single_exit (outer)->src;
122138fd1498Szrj   gimple_stmt_iterator gsi, to;
122238fd1498Szrj 
122338fd1498Szrj   for (unsigned i = 0; i < outer->num_nodes; i++)
122438fd1498Szrj     {
122538fd1498Szrj       basic_block bb = outer_bbs[i];
122638fd1498Szrj 
122738fd1498Szrj       /* Skip basic blocks of inner loop.  */
122838fd1498Szrj       if (flow_bb_inside_loop_p (inner, bb))
122938fd1498Szrj 	continue;
123038fd1498Szrj 
123138fd1498Szrj       /* Move code from header/latch to header/latch.  */
123238fd1498Szrj       if (bb == outer->header)
123338fd1498Szrj 	to = gsi_after_labels (inner->header);
123438fd1498Szrj       else if (bb == outer->latch)
123538fd1498Szrj 	to = gsi_after_labels (inner->latch);
123638fd1498Szrj       else
123738fd1498Szrj 	/* Otherwise, simply move to exit->src.  */
123838fd1498Szrj 	to = gsi_last_bb (single_exit (inner)->src);
123938fd1498Szrj 
124038fd1498Szrj       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
124138fd1498Szrj 	{
124238fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
124338fd1498Szrj 
124438fd1498Szrj 	  if (oloop_exit_bb == bb
124538fd1498Szrj 	      && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
124638fd1498Szrj 	    {
124738fd1498Szrj 	      gsi_next (&gsi);
124838fd1498Szrj 	      continue;
124938fd1498Szrj 	    }
125038fd1498Szrj 
125138fd1498Szrj 	  if (gimple_vuse (stmt))
125238fd1498Szrj 	    gimple_set_vuse (stmt, NULL_TREE);
125338fd1498Szrj 	  if (gimple_vdef (stmt))
125438fd1498Szrj 	    {
125538fd1498Szrj 	      unlink_stmt_vdef (stmt);
125638fd1498Szrj 	      release_ssa_name (gimple_vdef (stmt));
125738fd1498Szrj 	      gimple_set_vdef (stmt, NULL_TREE);
125838fd1498Szrj 	    }
125938fd1498Szrj 
126038fd1498Szrj 	  reset_debug_uses (stmt);
126138fd1498Szrj 	  gsi_move_before (&gsi, &to);
126238fd1498Szrj 	}
126338fd1498Szrj     }
126438fd1498Szrj }
126538fd1498Szrj 
126638fd1498Szrj /* Given data reference DR in LOOP_NEST, the function computes DR's access
126738fd1498Szrj    stride at each level of loop from innermost LOOP to outer.  On success,
126838fd1498Szrj    it saves access stride at each level loop in a vector which is pointed
126938fd1498Szrj    by DR->aux.  For example:
127038fd1498Szrj 
127138fd1498Szrj      int arr[100][100][100];
127238fd1498Szrj      for (i = 0; i < 100; i++)       ;(DR->aux)strides[0] = 40000
127338fd1498Szrj        for (j = 100; j > 0; j--)     ;(DR->aux)strides[1] = 400
127438fd1498Szrj 	 for (k = 0; k < 100; k++)   ;(DR->aux)strides[2] = 4
127538fd1498Szrj 	   arr[i][j - 1][k] = 0;  */
127638fd1498Szrj 
127738fd1498Szrj static void
compute_access_stride(struct loop * loop_nest,struct loop * loop,data_reference_p dr)127838fd1498Szrj compute_access_stride (struct loop *loop_nest, struct loop *loop,
127938fd1498Szrj 		       data_reference_p dr)
128038fd1498Szrj {
128138fd1498Szrj   vec<tree> *strides = new vec<tree> ();
128238fd1498Szrj   basic_block bb = gimple_bb (DR_STMT (dr));
128338fd1498Szrj 
128438fd1498Szrj   while (!flow_bb_inside_loop_p (loop, bb))
128538fd1498Szrj     {
128638fd1498Szrj       strides->safe_push (build_int_cst (sizetype, 0));
128738fd1498Szrj       loop = loop_outer (loop);
128838fd1498Szrj     }
128938fd1498Szrj   gcc_assert (loop == bb->loop_father);
129038fd1498Szrj 
129138fd1498Szrj   tree ref = DR_REF (dr);
129238fd1498Szrj   if (TREE_CODE (ref) == COMPONENT_REF
129338fd1498Szrj       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
129438fd1498Szrj     {
129538fd1498Szrj       /* We can't take address of bitfields.  If the bitfield is at constant
129638fd1498Szrj 	 offset from the start of the struct, just use address of the
129738fd1498Szrj 	 struct, for analysis of the strides that shouldn't matter.  */
129838fd1498Szrj       if (!TREE_OPERAND (ref, 2)
129938fd1498Szrj 	  || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
130038fd1498Szrj 	ref = TREE_OPERAND (ref, 0);
130138fd1498Szrj       /* Otherwise, if we have a bit field representative, use that.  */
130238fd1498Szrj       else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
130338fd1498Szrj 	       != NULL_TREE)
130438fd1498Szrj 	{
130538fd1498Szrj 	  tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
130638fd1498Szrj 	  ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
130738fd1498Szrj 			repr, TREE_OPERAND (ref, 2));
130838fd1498Szrj 	}
130938fd1498Szrj       /* Otherwise punt.  */
131038fd1498Szrj       else
131138fd1498Szrj 	{
131238fd1498Szrj 	  dr->aux = strides;
131338fd1498Szrj 	  return;
131438fd1498Szrj 	}
131538fd1498Szrj     }
131638fd1498Szrj   tree scev_base = build_fold_addr_expr (ref);
131738fd1498Szrj   tree scev = analyze_scalar_evolution (loop, scev_base);
131838fd1498Szrj   scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
131938fd1498Szrj   if (! chrec_contains_undetermined (scev))
132038fd1498Szrj     {
132138fd1498Szrj       tree sl = scev;
132238fd1498Szrj       struct loop *expected = loop;
132338fd1498Szrj       while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
132438fd1498Szrj 	{
132538fd1498Szrj 	  struct loop *sl_loop = get_chrec_loop (sl);
132638fd1498Szrj 	  while (sl_loop != expected)
132738fd1498Szrj 	    {
132838fd1498Szrj 	      strides->safe_push (size_int (0));
132938fd1498Szrj 	      expected = loop_outer (expected);
133038fd1498Szrj 	    }
133138fd1498Szrj 	  strides->safe_push (CHREC_RIGHT (sl));
133238fd1498Szrj 	  sl = CHREC_LEFT (sl);
133338fd1498Szrj 	  expected = loop_outer (expected);
133438fd1498Szrj 	}
133538fd1498Szrj       if (! tree_contains_chrecs (sl, NULL))
133638fd1498Szrj 	while (expected != loop_outer (loop_nest))
133738fd1498Szrj 	  {
133838fd1498Szrj 	    strides->safe_push (size_int (0));
133938fd1498Szrj 	    expected = loop_outer (expected);
134038fd1498Szrj 	  }
134138fd1498Szrj     }
134238fd1498Szrj 
134338fd1498Szrj   dr->aux = strides;
134438fd1498Szrj }
134538fd1498Szrj 
134638fd1498Szrj /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
134738fd1498Szrj    access strides with respect to each level loop for all data refs in
134838fd1498Szrj    DATAREFS from inner loop to outer loop.  On success, it returns the
134938fd1498Szrj    outermost loop that access strides can be computed successfully for
135038fd1498Szrj    all data references.  If access strides cannot be computed at least
135138fd1498Szrj    for two levels of loop for any data reference, it returns NULL.  */
135238fd1498Szrj 
135338fd1498Szrj static struct loop *
compute_access_strides(struct loop * loop_nest,struct loop * loop,vec<data_reference_p> datarefs)135438fd1498Szrj compute_access_strides (struct loop *loop_nest, struct loop *loop,
135538fd1498Szrj 			vec<data_reference_p> datarefs)
135638fd1498Szrj {
135738fd1498Szrj   unsigned i, j, num_loops = (unsigned) -1;
135838fd1498Szrj   data_reference_p dr;
135938fd1498Szrj   vec<tree> *stride;
136038fd1498Szrj 
136138fd1498Szrj   for (i = 0; datarefs.iterate (i, &dr); ++i)
136238fd1498Szrj     {
136338fd1498Szrj       compute_access_stride (loop_nest, loop, dr);
136438fd1498Szrj       stride = DR_ACCESS_STRIDE (dr);
136538fd1498Szrj       if (stride->length () < num_loops)
136638fd1498Szrj 	{
136738fd1498Szrj 	  num_loops = stride->length ();
136838fd1498Szrj 	  if (num_loops < 2)
136938fd1498Szrj 	    return NULL;
137038fd1498Szrj 	}
137138fd1498Szrj     }
137238fd1498Szrj 
137338fd1498Szrj   for (i = 0; datarefs.iterate (i, &dr); ++i)
137438fd1498Szrj     {
137538fd1498Szrj       stride = DR_ACCESS_STRIDE (dr);
137638fd1498Szrj       if (stride->length () > num_loops)
137738fd1498Szrj 	stride->truncate (num_loops);
137838fd1498Szrj 
137938fd1498Szrj       for (j = 0; j < (num_loops >> 1); ++j)
138038fd1498Szrj 	std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
138138fd1498Szrj     }
138238fd1498Szrj 
138338fd1498Szrj   loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
138438fd1498Szrj   gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
138538fd1498Szrj   return loop;
138638fd1498Szrj }
138738fd1498Szrj 
138838fd1498Szrj /* Prune access strides for data references in DATAREFS by removing strides
138938fd1498Szrj    of loops that isn't in current LOOP_NEST.  */
139038fd1498Szrj 
139138fd1498Szrj static void
prune_access_strides_not_in_loop(struct loop * loop_nest,struct loop * innermost,vec<data_reference_p> datarefs)139238fd1498Szrj prune_access_strides_not_in_loop (struct loop *loop_nest,
139338fd1498Szrj 				  struct loop *innermost,
139438fd1498Szrj 				  vec<data_reference_p> datarefs)
139538fd1498Szrj {
139638fd1498Szrj   data_reference_p dr;
139738fd1498Szrj   unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
139838fd1498Szrj   gcc_assert (num_loops > 1);
139938fd1498Szrj 
140038fd1498Szrj   /* Block remove strides of loops that is not in current loop nest.  */
140138fd1498Szrj   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
140238fd1498Szrj     {
140338fd1498Szrj       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
140438fd1498Szrj       if (stride->length () > num_loops)
140538fd1498Szrj 	stride->block_remove (0, stride->length () - num_loops);
140638fd1498Szrj     }
140738fd1498Szrj }
140838fd1498Szrj 
140938fd1498Szrj /* Dump access strides for all DATAREFS.  */
141038fd1498Szrj 
141138fd1498Szrj static void
dump_access_strides(vec<data_reference_p> datarefs)141238fd1498Szrj dump_access_strides (vec<data_reference_p> datarefs)
141338fd1498Szrj {
141438fd1498Szrj   data_reference_p dr;
141538fd1498Szrj   fprintf (dump_file, "Access Strides for DRs:\n");
141638fd1498Szrj   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
141738fd1498Szrj     {
141838fd1498Szrj       fprintf (dump_file, "  ");
141938fd1498Szrj       print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
142038fd1498Szrj       fprintf (dump_file, ":\t\t<");
142138fd1498Szrj 
142238fd1498Szrj       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
142338fd1498Szrj       unsigned num_loops = stride->length ();
142438fd1498Szrj       for (unsigned j = 0; j < num_loops; ++j)
142538fd1498Szrj 	{
142638fd1498Szrj 	  print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
142738fd1498Szrj 	  fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
142838fd1498Szrj 	}
142938fd1498Szrj     }
143038fd1498Szrj }
143138fd1498Szrj 
143238fd1498Szrj /* Return true if it's profitable to interchange two loops whose index
143338fd1498Szrj    in whole loop nest vector are I_IDX/O_IDX respectively.  The function
143438fd1498Szrj    computes and compares three types information from all DATAREFS:
143538fd1498Szrj      1) Access stride for loop I_IDX and O_IDX.
143638fd1498Szrj      2) Number of invariant memory references with respect to I_IDX before
143738fd1498Szrj 	and after loop interchange.
143838fd1498Szrj      3) Flags indicating if all memory references access sequential memory
143938fd1498Szrj 	in ILOOP, before and after loop interchange.
144038fd1498Szrj    If INNMOST_LOOP_P is true, the two loops for interchanging are the two
144138fd1498Szrj    innermost loops in loop nest.  This function also dumps information if
144238fd1498Szrj    DUMP_INFO_P is true.  */
144338fd1498Szrj 
144438fd1498Szrj static bool
should_interchange_loops(unsigned i_idx,unsigned o_idx,vec<data_reference_p> datarefs,unsigned i_stmt_cost,unsigned o_stmt_cost,bool innermost_loops_p,bool dump_info_p=true)144538fd1498Szrj should_interchange_loops (unsigned i_idx, unsigned o_idx,
144638fd1498Szrj 			  vec<data_reference_p> datarefs,
144738fd1498Szrj 			  unsigned i_stmt_cost, unsigned o_stmt_cost,
144838fd1498Szrj 			  bool innermost_loops_p, bool dump_info_p = true)
144938fd1498Szrj {
145038fd1498Szrj   unsigned HOST_WIDE_INT ratio;
145138fd1498Szrj   unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
145238fd1498Szrj   struct data_reference *dr;
145338fd1498Szrj   bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
145438fd1498Szrj   widest_int iloop_strides = 0, oloop_strides = 0;
145538fd1498Szrj   unsigned num_unresolved_drs = 0;
145638fd1498Szrj   unsigned num_resolved_ok_drs = 0;
145738fd1498Szrj   unsigned num_resolved_not_ok_drs = 0;
145838fd1498Szrj 
145938fd1498Szrj   if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
146038fd1498Szrj     fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
146138fd1498Szrj 
146238fd1498Szrj   for (i = 0; datarefs.iterate (i, &dr); ++i)
146338fd1498Szrj     {
146438fd1498Szrj       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
146538fd1498Szrj       tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
146638fd1498Szrj 
146738fd1498Szrj       bool subloop_stride_p = false;
146838fd1498Szrj       /* Data ref can't be invariant or sequential access at current loop if
146938fd1498Szrj 	 its address changes with respect to any subloops.  */
147038fd1498Szrj       for (j = i_idx + 1; j < stride->length (); ++j)
147138fd1498Szrj 	if (!integer_zerop ((*stride)[j]))
147238fd1498Szrj 	  {
147338fd1498Szrj 	    subloop_stride_p = true;
147438fd1498Szrj 	    break;
147538fd1498Szrj 	  }
147638fd1498Szrj 
147738fd1498Szrj       if (integer_zerop (iloop_stride))
147838fd1498Szrj 	{
147938fd1498Szrj 	  if (!subloop_stride_p)
148038fd1498Szrj 	    num_old_inv_drs++;
148138fd1498Szrj 	}
148238fd1498Szrj       if (integer_zerop (oloop_stride))
148338fd1498Szrj 	{
148438fd1498Szrj 	  if (!subloop_stride_p)
148538fd1498Szrj 	    num_new_inv_drs++;
148638fd1498Szrj 	}
148738fd1498Szrj 
148838fd1498Szrj       if (TREE_CODE (iloop_stride) == INTEGER_CST
148938fd1498Szrj 	  && TREE_CODE (oloop_stride) == INTEGER_CST)
149038fd1498Szrj 	{
149138fd1498Szrj 	  iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
149238fd1498Szrj 	  oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
149338fd1498Szrj 	}
149438fd1498Szrj       else if (multiple_of_p (TREE_TYPE (iloop_stride),
149538fd1498Szrj 			      iloop_stride, oloop_stride))
149638fd1498Szrj 	num_resolved_ok_drs++;
149738fd1498Szrj       else if (multiple_of_p (TREE_TYPE (iloop_stride),
149838fd1498Szrj 			      oloop_stride, iloop_stride))
149938fd1498Szrj 	num_resolved_not_ok_drs++;
150038fd1498Szrj       else
150138fd1498Szrj 	num_unresolved_drs++;
150238fd1498Szrj 
150338fd1498Szrj       /* Data ref can't be sequential access if its address changes in sub
150438fd1498Szrj 	 loop.  */
150538fd1498Szrj       if (subloop_stride_p)
150638fd1498Szrj 	{
150738fd1498Szrj 	  all_seq_dr_before_p = false;
150838fd1498Szrj 	  all_seq_dr_after_p = false;
150938fd1498Szrj 	  continue;
151038fd1498Szrj 	}
151138fd1498Szrj       /* Track if all data references are sequential accesses before/after loop
151238fd1498Szrj 	 interchange.  Note invariant is considered sequential here.  */
151338fd1498Szrj       tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
151438fd1498Szrj       if (all_seq_dr_before_p
151538fd1498Szrj 	  && ! (integer_zerop (iloop_stride)
151638fd1498Szrj 		|| operand_equal_p (access_size, iloop_stride, 0)))
151738fd1498Szrj 	all_seq_dr_before_p = false;
151838fd1498Szrj       if (all_seq_dr_after_p
151938fd1498Szrj 	  && ! (integer_zerop (oloop_stride)
152038fd1498Szrj 		|| operand_equal_p (access_size, oloop_stride, 0)))
152138fd1498Szrj 	all_seq_dr_after_p = false;
152238fd1498Szrj     }
152338fd1498Szrj 
152438fd1498Szrj   if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
152538fd1498Szrj     {
152638fd1498Szrj       fprintf (dump_file, "\toverall:\t\t");
152738fd1498Szrj       print_decu (iloop_strides, dump_file);
152838fd1498Szrj       fprintf (dump_file, "\t");
152938fd1498Szrj       print_decu (oloop_strides, dump_file);
153038fd1498Szrj       fprintf (dump_file, "\n");
153138fd1498Szrj 
153238fd1498Szrj       fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
153338fd1498Szrj 	       num_old_inv_drs, num_new_inv_drs);
153438fd1498Szrj       fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
153538fd1498Szrj 	       all_seq_dr_before_p ? "true" : "false",
153638fd1498Szrj 	       all_seq_dr_after_p ? "true" : "false");
153738fd1498Szrj       fprintf (dump_file, "OK to interchage with variable strides: %d\n",
153838fd1498Szrj 	       num_resolved_ok_drs);
153938fd1498Szrj       fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
154038fd1498Szrj 	       num_resolved_not_ok_drs);
154138fd1498Szrj       fprintf (dump_file, "Variable strides we cannot decide: %d\n",
154238fd1498Szrj 	       num_unresolved_drs);
154338fd1498Szrj       fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
154438fd1498Szrj       fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
154538fd1498Szrj     }
154638fd1498Szrj 
154738fd1498Szrj   if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
154838fd1498Szrj     return false;
154938fd1498Szrj 
155038fd1498Szrj   /* Stmts of outer loop will be moved to inner loop.  If there are two many
155138fd1498Szrj      such stmts, it could make inner loop costly.  Here we compare stmt cost
155238fd1498Szrj      between outer and inner loops.  */
155338fd1498Szrj   if (i_stmt_cost && o_stmt_cost
155438fd1498Szrj       && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
155538fd1498Szrj       && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
155638fd1498Szrj     return false;
155738fd1498Szrj 
155838fd1498Szrj   /* We use different stride comparison ratio for interchanging innermost
155938fd1498Szrj      two loops or not.  The idea is to be conservative in interchange for
156038fd1498Szrj      the innermost loops.  */
156138fd1498Szrj   ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
156238fd1498Szrj   /* Do interchange if it gives better data locality behavior.  */
156338fd1498Szrj   if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
156438fd1498Szrj     return true;
156538fd1498Szrj   if (wi::gtu_p (iloop_strides, oloop_strides))
156638fd1498Szrj     {
156738fd1498Szrj       /* Or it creates more invariant memory references.  */
156838fd1498Szrj       if ((!all_seq_dr_before_p || all_seq_dr_after_p)
156938fd1498Szrj 	  && num_new_inv_drs > num_old_inv_drs)
157038fd1498Szrj 	return true;
157138fd1498Szrj       /* Or it makes all memory references sequential.  */
157238fd1498Szrj       if (num_new_inv_drs >= num_old_inv_drs
157338fd1498Szrj 	  && !all_seq_dr_before_p && all_seq_dr_after_p)
157438fd1498Szrj 	return true;
157538fd1498Szrj     }
157638fd1498Szrj 
157738fd1498Szrj   return false;
157838fd1498Szrj }
157938fd1498Szrj 
158038fd1498Szrj /* Try to interchange inner loop of a loop nest to outer level.  */
158138fd1498Szrj 
158238fd1498Szrj bool
interchange(vec<data_reference_p> datarefs,vec<ddr_p> ddrs)158338fd1498Szrj tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
158438fd1498Szrj 				    vec<ddr_p> ddrs)
158538fd1498Szrj {
158638fd1498Szrj   location_t loc = find_loop_location (m_loop_nest[0]);
158738fd1498Szrj   bool changed_p = false;
158838fd1498Szrj   /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
158938fd1498Szrj      The overall effect is to push inner loop to outermost level in whole
159038fd1498Szrj      loop nest.  */
159138fd1498Szrj   for (unsigned i = m_loop_nest.length (); i > 1; --i)
159238fd1498Szrj     {
159338fd1498Szrj       unsigned i_idx = i - 1, o_idx = i - 2;
159438fd1498Szrj 
159538fd1498Szrj       /* Check validity for loop interchange.  */
159638fd1498Szrj       if (!valid_data_dependences (i_idx, o_idx, ddrs))
159738fd1498Szrj 	break;
159838fd1498Szrj 
159938fd1498Szrj       loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
160038fd1498Szrj       loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
160138fd1498Szrj 
160238fd1498Szrj       /* Check if we can do transformation for loop interchange.  */
160338fd1498Szrj       if (!iloop.analyze_carried_vars (NULL)
160438fd1498Szrj 	  || !iloop.analyze_lcssa_phis ()
160538fd1498Szrj 	  || !oloop.analyze_carried_vars (&iloop)
160638fd1498Szrj 	  || !oloop.analyze_lcssa_phis ()
160738fd1498Szrj 	  || !iloop.can_interchange_p (NULL)
160838fd1498Szrj 	  || !oloop.can_interchange_p (&iloop))
160938fd1498Szrj 	break;
161038fd1498Szrj 
161138fd1498Szrj       /* Outer loop's stmts will be moved to inner loop during interchange.
161238fd1498Szrj 	 If there are many of them, it may make inner loop very costly.  We
161338fd1498Szrj 	 need to check number of outer loop's stmts in profit cost model of
161438fd1498Szrj 	 interchange.  */
161538fd1498Szrj       int stmt_cost = oloop.m_num_stmts;
161638fd1498Szrj       /* Count out the exit checking stmt of outer loop.  */
161738fd1498Szrj       stmt_cost --;
161838fd1498Szrj       /* Count out IV's increasing stmt, IVOPTs takes care if it.  */
161938fd1498Szrj       stmt_cost -= oloop.m_inductions.length ();
162038fd1498Szrj       /* Count in the additional load and cond_expr stmts caused by inner
162138fd1498Szrj 	 loop's constant initialized reduction.  */
162238fd1498Szrj       stmt_cost += iloop.m_const_init_reduc * 2;
162338fd1498Szrj       if (stmt_cost < 0)
162438fd1498Szrj 	stmt_cost = 0;
162538fd1498Szrj 
162638fd1498Szrj       /* Check profitability for loop interchange.  */
162738fd1498Szrj       if (should_interchange_loops (i_idx, o_idx, datarefs,
162838fd1498Szrj 				    (unsigned) iloop.m_num_stmts,
162938fd1498Szrj 				    (unsigned) stmt_cost,
163038fd1498Szrj 				    iloop.m_loop->inner == NULL))
163138fd1498Szrj 	{
163238fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
163338fd1498Szrj 	    fprintf (dump_file,
163438fd1498Szrj 		     "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
163538fd1498Szrj 		     oloop.m_loop->num, iloop.m_loop->num);
163638fd1498Szrj 
163738fd1498Szrj 	  changed_p = true;
163838fd1498Szrj 	  interchange_loops (iloop, oloop);
163938fd1498Szrj 	  /* No need to update if there is no further loop interchange.  */
164038fd1498Szrj 	  if (o_idx > 0)
164138fd1498Szrj 	    update_data_info (i_idx, o_idx, datarefs, ddrs);
164238fd1498Szrj 	}
164338fd1498Szrj       else
164438fd1498Szrj 	{
164538fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
164638fd1498Szrj 	    fprintf (dump_file,
164738fd1498Szrj 		     "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
164838fd1498Szrj 		     oloop.m_loop->num, iloop.m_loop->num);
164938fd1498Szrj 	}
165038fd1498Szrj     }
165138fd1498Szrj   simple_dce_from_worklist (m_dce_seeds);
165238fd1498Szrj 
165338fd1498Szrj   if (changed_p)
165438fd1498Szrj     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
165538fd1498Szrj 		     "loops interchanged in loop nest\n");
165638fd1498Szrj 
165738fd1498Szrj   return changed_p;
165838fd1498Szrj }
165938fd1498Szrj 
166038fd1498Szrj 
166138fd1498Szrj /* Loop interchange pass.  */
166238fd1498Szrj 
166338fd1498Szrj namespace {
166438fd1498Szrj 
166538fd1498Szrj const pass_data pass_data_linterchange =
166638fd1498Szrj {
166738fd1498Szrj   GIMPLE_PASS, /* type */
166838fd1498Szrj   "linterchange", /* name */
166938fd1498Szrj   OPTGROUP_LOOP, /* optinfo_flags */
167038fd1498Szrj   TV_LINTERCHANGE, /* tv_id */
167138fd1498Szrj   PROP_cfg, /* properties_required */
167238fd1498Szrj   0, /* properties_provided */
167338fd1498Szrj   0, /* properties_destroyed */
167438fd1498Szrj   0, /* todo_flags_start */
167538fd1498Szrj   0, /* todo_flags_finish */
167638fd1498Szrj };
167738fd1498Szrj 
167838fd1498Szrj class pass_linterchange : public gimple_opt_pass
167938fd1498Szrj {
168038fd1498Szrj public:
pass_linterchange(gcc::context * ctxt)168138fd1498Szrj   pass_linterchange (gcc::context *ctxt)
168238fd1498Szrj     : gimple_opt_pass (pass_data_linterchange, ctxt)
168338fd1498Szrj   {}
168438fd1498Szrj 
168538fd1498Szrj   /* opt_pass methods: */
clone()168638fd1498Szrj   opt_pass * clone () { return new pass_linterchange (m_ctxt); }
gate(function *)168738fd1498Szrj   virtual bool gate (function *) { return flag_loop_interchange; }
168838fd1498Szrj   virtual unsigned int execute (function *);
168938fd1498Szrj 
169038fd1498Szrj }; // class pass_linterchange
169138fd1498Szrj 
169238fd1498Szrj 
169338fd1498Szrj /* Return true if LOOP has proper form for interchange.  We check three
169438fd1498Szrj    conditions in the function:
169538fd1498Szrj      1) In general, a loop can be interchanged only if it doesn't have
169638fd1498Szrj 	basic blocks other than header, exit and latch besides possible
169738fd1498Szrj 	inner loop nest.  This basically restricts loop interchange to
169838fd1498Szrj 	below form loop nests:
169938fd1498Szrj 
170038fd1498Szrj           header<---+
170138fd1498Szrj             |       |
170238fd1498Szrj             v       |
170338fd1498Szrj         INNER_LOOP  |
170438fd1498Szrj             |       |
170538fd1498Szrj             v       |
170638fd1498Szrj           exit--->latch
170738fd1498Szrj 
170838fd1498Szrj      2) Data reference in basic block that executes in different times
170938fd1498Szrj 	than loop head/exit is not allowed.
171038fd1498Szrj      3) Record the innermost outer loop that doesn't form rectangle loop
171138fd1498Szrj 	nest with LOOP.  */
171238fd1498Szrj 
171338fd1498Szrj static bool
proper_loop_form_for_interchange(struct loop * loop,struct loop ** min_outer)171438fd1498Szrj proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
171538fd1498Szrj {
171638fd1498Szrj   edge e0, e1, exit;
171738fd1498Szrj 
171838fd1498Szrj   /* Don't interchange if loop has unsupported information for the moment.  */
171938fd1498Szrj   if (loop->safelen > 0
172038fd1498Szrj       || loop->constraints != 0
172138fd1498Szrj       || loop->can_be_parallel
172238fd1498Szrj       || loop->dont_vectorize
172338fd1498Szrj       || loop->force_vectorize
172438fd1498Szrj       || loop->in_oacc_kernels_region
172538fd1498Szrj       || loop->orig_loop_num != 0
172638fd1498Szrj       || loop->simduid != NULL_TREE)
172738fd1498Szrj     return false;
172838fd1498Szrj 
172938fd1498Szrj   /* Don't interchange if outer loop has basic block other than header, exit
173038fd1498Szrj      and latch.  */
173138fd1498Szrj   if (loop->inner != NULL
173238fd1498Szrj       && loop->num_nodes != loop->inner->num_nodes + 3)
173338fd1498Szrj     return false;
173438fd1498Szrj 
173538fd1498Szrj   if ((exit = single_dom_exit (loop)) == NULL)
173638fd1498Szrj     return false;
173738fd1498Szrj 
173838fd1498Szrj   /* Check control flow on loop header/exit blocks.  */
173938fd1498Szrj   if (loop->header == exit->src
174038fd1498Szrj       && (EDGE_COUNT (loop->header->preds) != 2
174138fd1498Szrj 	  || EDGE_COUNT (loop->header->succs) != 2))
174238fd1498Szrj     return false;
174338fd1498Szrj   else if (loop->header != exit->src
174438fd1498Szrj 	   && (EDGE_COUNT (loop->header->preds) != 2
174538fd1498Szrj 	       || !single_succ_p (loop->header)
174638fd1498Szrj 	       || unsupported_edge (single_succ_edge (loop->header))
174738fd1498Szrj 	       || EDGE_COUNT (exit->src->succs) != 2
174838fd1498Szrj 	       || !single_pred_p (exit->src)
174938fd1498Szrj 	       || unsupported_edge (single_pred_edge (exit->src))))
175038fd1498Szrj     return false;
175138fd1498Szrj 
175238fd1498Szrj   e0 = EDGE_PRED (loop->header, 0);
175338fd1498Szrj   e1 = EDGE_PRED (loop->header, 1);
175438fd1498Szrj   if (unsupported_edge (e0) || unsupported_edge (e1)
175538fd1498Szrj       || (e0->src != loop->latch && e1->src != loop->latch)
175638fd1498Szrj       || (e0->src->loop_father == loop && e1->src->loop_father == loop))
175738fd1498Szrj     return false;
175838fd1498Szrj 
175938fd1498Szrj   e0 = EDGE_SUCC (exit->src, 0);
176038fd1498Szrj   e1 = EDGE_SUCC (exit->src, 1);
176138fd1498Szrj   if (unsupported_edge (e0) || unsupported_edge (e1)
176238fd1498Szrj       || (e0->dest != loop->latch && e1->dest != loop->latch)
176338fd1498Szrj       || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
176438fd1498Szrj     return false;
176538fd1498Szrj 
176638fd1498Szrj   /* Don't interchange if any reference is in basic block that doesn't
176738fd1498Szrj      dominate exit block.  */
176838fd1498Szrj   basic_block *bbs = get_loop_body (loop);
176938fd1498Szrj   for (unsigned i = 0; i < loop->num_nodes; i++)
177038fd1498Szrj     {
177138fd1498Szrj       basic_block bb = bbs[i];
177238fd1498Szrj 
177338fd1498Szrj       if (bb->loop_father != loop
177438fd1498Szrj 	  || bb == loop->header || bb == exit->src
177538fd1498Szrj 	  || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
177638fd1498Szrj 	continue;
177738fd1498Szrj 
177838fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
177938fd1498Szrj 	   !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
178038fd1498Szrj 	if (gimple_vuse (gsi_stmt (gsi)))
178138fd1498Szrj 	  {
178238fd1498Szrj 	    free (bbs);
178338fd1498Szrj 	    return false;
178438fd1498Szrj 	  }
178538fd1498Szrj     }
178638fd1498Szrj   free (bbs);
178738fd1498Szrj 
178838fd1498Szrj   tree niters = number_of_latch_executions (loop);
178938fd1498Szrj   niters = analyze_scalar_evolution (loop_outer (loop), niters);
179038fd1498Szrj   if (!niters || chrec_contains_undetermined (niters))
179138fd1498Szrj     return false;
179238fd1498Szrj 
179338fd1498Szrj   /* Record the innermost outer loop that doesn't form rectangle loop nest.  */
179438fd1498Szrj   for (loop_p loop2 = loop_outer (loop);
179538fd1498Szrj        loop2 && flow_loop_nested_p (*min_outer, loop2);
179638fd1498Szrj        loop2 = loop_outer (loop2))
179738fd1498Szrj     {
179838fd1498Szrj       niters = instantiate_scev (loop_preheader_edge (loop2),
179938fd1498Szrj 				 loop_outer (loop), niters);
180038fd1498Szrj       if (!evolution_function_is_invariant_p (niters, loop2->num))
180138fd1498Szrj 	{
180238fd1498Szrj 	  *min_outer = loop2;
180338fd1498Szrj 	  break;
180438fd1498Szrj 	}
180538fd1498Szrj     }
180638fd1498Szrj   return true;
180738fd1498Szrj }
180838fd1498Szrj 
180938fd1498Szrj /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
181038fd1498Szrj    should be interchanged by looking into all DATAREFS.  */
181138fd1498Szrj 
181238fd1498Szrj static bool
should_interchange_loop_nest(struct loop * loop_nest,struct loop * innermost,vec<data_reference_p> datarefs)181338fd1498Szrj should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
181438fd1498Szrj 			      vec<data_reference_p> datarefs)
181538fd1498Szrj {
181638fd1498Szrj   unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
181738fd1498Szrj   gcc_assert (idx > 0);
181838fd1498Szrj 
181938fd1498Szrj   /* Check if any two adjacent loops should be interchanged.  */
182038fd1498Szrj   for (struct loop *loop = innermost;
182138fd1498Szrj        loop != loop_nest; loop = loop_outer (loop), idx--)
182238fd1498Szrj     if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
182338fd1498Szrj 				  loop == innermost, false))
182438fd1498Szrj       return true;
182538fd1498Szrj 
182638fd1498Szrj   return false;
182738fd1498Szrj }
182838fd1498Szrj 
182938fd1498Szrj /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
183038fd1498Szrj    dependences for loop interchange and store it in DDRS.  Note we compute
183138fd1498Szrj    dependences directly rather than call generic interface so that we can
183238fd1498Szrj    return on unknown dependence instantly.  */
183338fd1498Szrj 
183438fd1498Szrj static bool
tree_loop_interchange_compute_ddrs(vec<loop_p> loop_nest,vec<data_reference_p> datarefs,vec<ddr_p> * ddrs)183538fd1498Szrj tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
183638fd1498Szrj 				    vec<data_reference_p> datarefs,
183738fd1498Szrj 				    vec<ddr_p> *ddrs)
183838fd1498Szrj {
183938fd1498Szrj   struct data_reference *a, *b;
184038fd1498Szrj   struct loop *innermost = loop_nest.last ();
184138fd1498Szrj 
184238fd1498Szrj   for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
184338fd1498Szrj     {
184438fd1498Szrj       bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
184538fd1498Szrj       for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
184638fd1498Szrj 	if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
184738fd1498Szrj 	  {
184838fd1498Szrj 	    bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
184938fd1498Szrj 	    /* Don't support multiple write references in outer loop.  */
185038fd1498Szrj 	    if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
185138fd1498Szrj 	      return false;
185238fd1498Szrj 
185338fd1498Szrj 	    ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
185438fd1498Szrj 	    ddrs->safe_push (ddr);
185538fd1498Szrj 	    compute_affine_dependence (ddr, loop_nest[0]);
185638fd1498Szrj 
185738fd1498Szrj 	    /* Give up if ddr is unknown dependence or classic direct vector
185838fd1498Szrj 	       is not available.  */
185938fd1498Szrj 	    if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
186038fd1498Szrj 		|| (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
186138fd1498Szrj 		    && DDR_NUM_DIR_VECTS (ddr) == 0))
186238fd1498Szrj 	      return false;
186338fd1498Szrj 
186438fd1498Szrj 	    /* If either data references is in outer loop of nest, we require
186538fd1498Szrj 	       no dependence here because the data reference need to be moved
186638fd1498Szrj 	       into inner loop during interchange.  */
186738fd1498Szrj 	    if (a_outer_p && b_outer_p
186838fd1498Szrj 		&& operand_equal_p (DR_REF (a), DR_REF (b), 0))
186938fd1498Szrj 	      continue;
187038fd1498Szrj 	    if (DDR_ARE_DEPENDENT (ddr) != chrec_known
187138fd1498Szrj 		&& (a_outer_p || b_outer_p))
187238fd1498Szrj 	      return false;
187338fd1498Szrj 	}
187438fd1498Szrj     }
187538fd1498Szrj 
187638fd1498Szrj   return true;
187738fd1498Szrj }
187838fd1498Szrj 
187938fd1498Szrj /* Prune DATAREFS by removing any data reference not inside of LOOP.  */
188038fd1498Szrj 
188138fd1498Szrj static inline void
prune_datarefs_not_in_loop(struct loop * loop,vec<data_reference_p> datarefs)188238fd1498Szrj prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
188338fd1498Szrj {
188438fd1498Szrj   unsigned i, j;
188538fd1498Szrj   struct data_reference *dr;
188638fd1498Szrj 
188738fd1498Szrj   for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
188838fd1498Szrj     {
188938fd1498Szrj       if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
189038fd1498Szrj 	datarefs[j++] = dr;
189138fd1498Szrj       else
189238fd1498Szrj 	{
189338fd1498Szrj 	  if (dr->aux)
189438fd1498Szrj 	    {
189538fd1498Szrj 	      DR_ACCESS_STRIDE (dr)->release ();
189638fd1498Szrj 	      delete (vec<tree> *) dr->aux;
189738fd1498Szrj 	    }
189838fd1498Szrj 	  free_data_ref (dr);
189938fd1498Szrj 	}
190038fd1498Szrj     }
190138fd1498Szrj   datarefs.truncate (j);
190238fd1498Szrj }
190338fd1498Szrj 
190438fd1498Szrj /* Find and store data references in DATAREFS for LOOP nest.  If there's
190538fd1498Szrj    difficult data reference in a basic block, we shrink the loop nest to
190638fd1498Szrj    inner loop of that basic block's father loop.  On success, return the
190738fd1498Szrj    outer loop of the result loop nest.  */
190838fd1498Szrj 
190938fd1498Szrj static struct loop *
prepare_data_references(struct loop * loop,vec<data_reference_p> * datarefs)191038fd1498Szrj prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
191138fd1498Szrj {
191238fd1498Szrj   struct loop *loop_nest = loop;
191338fd1498Szrj   vec<data_reference_p> *bb_refs;
191438fd1498Szrj   basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
191538fd1498Szrj 
191638fd1498Szrj   for (unsigned i = 0; i < loop->num_nodes; i++)
191738fd1498Szrj     bbs[i]->aux = NULL;
191838fd1498Szrj 
191938fd1498Szrj   /* Find data references for all basic blocks.  Shrink loop nest on difficult
192038fd1498Szrj      data reference.  */
192138fd1498Szrj   for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
192238fd1498Szrj     {
192338fd1498Szrj       bb = bbs[i];
192438fd1498Szrj       if (!flow_bb_inside_loop_p (loop_nest, bb))
192538fd1498Szrj 	continue;
192638fd1498Szrj 
192738fd1498Szrj       bb_refs = new vec<data_reference_p> ();
192838fd1498Szrj       if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
192938fd1498Szrj         {
193038fd1498Szrj 	  loop_nest = bb->loop_father->inner;
193138fd1498Szrj 	  if (loop_nest && !loop_nest->inner)
193238fd1498Szrj 	    loop_nest = NULL;
193338fd1498Szrj 
193438fd1498Szrj 	  free_data_refs (*bb_refs);
193538fd1498Szrj           delete bb_refs;
193638fd1498Szrj         }
193738fd1498Szrj       else if (bb_refs->is_empty ())
193838fd1498Szrj 	delete bb_refs;
193938fd1498Szrj       else
194038fd1498Szrj 	bb->aux = bb_refs;
194138fd1498Szrj     }
194238fd1498Szrj 
194338fd1498Szrj   /* Collect all data references in loop nest.  */
194438fd1498Szrj   for (unsigned i = 0; i < loop->num_nodes; i++)
194538fd1498Szrj     {
194638fd1498Szrj       bb = bbs[i];
194738fd1498Szrj       if (!bb->aux)
194838fd1498Szrj 	continue;
194938fd1498Szrj 
195038fd1498Szrj       bb_refs = (vec<data_reference_p> *) bb->aux;
195138fd1498Szrj       if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
195238fd1498Szrj 	datarefs->safe_splice (*bb_refs);
195338fd1498Szrj       else
195438fd1498Szrj 	free_data_refs (*bb_refs);
195538fd1498Szrj 
195638fd1498Szrj       delete bb_refs;
195738fd1498Szrj       bb->aux = NULL;
195838fd1498Szrj     }
195938fd1498Szrj   free (bbs);
196038fd1498Szrj 
196138fd1498Szrj   return loop_nest;
196238fd1498Szrj }
196338fd1498Szrj 
196438fd1498Szrj /* Given innermost LOOP, return true if perfect loop nest can be found and
196538fd1498Szrj    data dependences can be computed.  If succeed, record the perfect loop
196638fd1498Szrj    nest in LOOP_NEST; record all data references in DATAREFS and record all
196738fd1498Szrj    data dependence relations in DDRS.
196838fd1498Szrj 
196938fd1498Szrj    We do support a restricted form of imperfect loop nest, i.e, loop nest
197038fd1498Szrj    with load/store in outer loop initializing/finalizing simple reduction
197138fd1498Szrj    of the innermost loop.  For such outer loop reference, we require that
197238fd1498Szrj    it has no dependence with others sinve it will be moved to inner loop
197338fd1498Szrj    in interchange.  */
197438fd1498Szrj 
197538fd1498Szrj static bool
prepare_perfect_loop_nest(struct loop * loop,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * ddrs)197638fd1498Szrj prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
197738fd1498Szrj 			   vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
197838fd1498Szrj {
197938fd1498Szrj   struct loop *start_loop = NULL, *innermost = loop;
198038fd1498Szrj   struct loop *outermost = loops_for_fn (cfun)->tree_root;
198138fd1498Szrj 
198238fd1498Szrj   /* Find loop nest from the innermost loop.  The outermost is the innermost
198338fd1498Szrj      outer*/
198438fd1498Szrj   while (loop->num != 0 && loop->inner == start_loop
198538fd1498Szrj 	 && flow_loop_nested_p (outermost, loop))
198638fd1498Szrj     {
198738fd1498Szrj       if (!proper_loop_form_for_interchange (loop, &outermost))
198838fd1498Szrj 	break;
198938fd1498Szrj 
199038fd1498Szrj       start_loop = loop;
199138fd1498Szrj       /* If this loop has sibling loop, the father loop won't be in perfect
199238fd1498Szrj 	 loop nest.  */
199338fd1498Szrj       if (loop->next != NULL)
199438fd1498Szrj 	break;
199538fd1498Szrj 
199638fd1498Szrj       loop = loop_outer (loop);
199738fd1498Szrj     }
199838fd1498Szrj   if (!start_loop || !start_loop->inner)
199938fd1498Szrj     return false;
200038fd1498Szrj 
200138fd1498Szrj   /* Prepare the data reference vector for the loop nest, pruning outer
200238fd1498Szrj      loops we cannot handle.  */
200338fd1498Szrj   start_loop = prepare_data_references (start_loop, datarefs);
200438fd1498Szrj   if (!start_loop
200538fd1498Szrj       /* Check if there is no data reference.  */
200638fd1498Szrj       || datarefs->is_empty ()
200738fd1498Szrj       /* Check if there are too many of data references.  */
200838fd1498Szrj       || (int) datarefs->length () > MAX_DATAREFS)
200938fd1498Szrj     return false;
201038fd1498Szrj 
201138fd1498Szrj   /* Compute access strides for all data references, pruning outer
201238fd1498Szrj      loops we cannot analyze refs in.  */
201338fd1498Szrj   start_loop = compute_access_strides (start_loop, innermost, *datarefs);
201438fd1498Szrj   if (!start_loop)
201538fd1498Szrj     return false;
201638fd1498Szrj 
201738fd1498Szrj   /* Check if any interchange is profitable in the loop nest.  */
201838fd1498Szrj   if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
201938fd1498Szrj     return false;
202038fd1498Szrj 
202138fd1498Szrj   /* Check if data dependences can be computed for loop nest starting from
202238fd1498Szrj      start_loop.  */
202338fd1498Szrj   loop = start_loop;
202438fd1498Szrj   do {
202538fd1498Szrj     loop_nest->truncate (0);
202638fd1498Szrj 
202738fd1498Szrj     if (loop != start_loop)
202838fd1498Szrj       prune_datarefs_not_in_loop (start_loop, *datarefs);
202938fd1498Szrj 
203038fd1498Szrj     if (find_loop_nest (start_loop, loop_nest)
203138fd1498Szrj 	&& tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
203238fd1498Szrj       {
203338fd1498Szrj 	if (dump_file && (dump_flags & TDF_DETAILS))
203438fd1498Szrj 	  fprintf (dump_file,
203538fd1498Szrj 		   "\nConsider loop interchange for loop_nest<%d - %d>\n",
203638fd1498Szrj 		   start_loop->num, innermost->num);
203738fd1498Szrj 
203838fd1498Szrj 	if (loop != start_loop)
203938fd1498Szrj 	  prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
204038fd1498Szrj 
204138fd1498Szrj 	if (dump_file && (dump_flags & TDF_DETAILS))
204238fd1498Szrj 	  dump_access_strides (*datarefs);
204338fd1498Szrj 
204438fd1498Szrj 	return true;
204538fd1498Szrj       }
204638fd1498Szrj 
204738fd1498Szrj     free_dependence_relations (*ddrs);
204838fd1498Szrj     *ddrs = vNULL;
204938fd1498Szrj     /* Try to compute data dependences with the outermost loop stripped.  */
205038fd1498Szrj     loop = start_loop;
205138fd1498Szrj     start_loop = start_loop->inner;
205238fd1498Szrj   } while (start_loop && start_loop->inner);
205338fd1498Szrj 
205438fd1498Szrj   return false;
205538fd1498Szrj }
205638fd1498Szrj 
205738fd1498Szrj /* Main entry for loop interchange pass.  */
205838fd1498Szrj 
205938fd1498Szrj unsigned int
execute(function * fun)206038fd1498Szrj pass_linterchange::execute (function *fun)
206138fd1498Szrj {
206238fd1498Szrj   if (number_of_loops (fun) <= 2)
206338fd1498Szrj     return 0;
206438fd1498Szrj 
206538fd1498Szrj   bool changed_p = false;
206638fd1498Szrj   struct loop *loop;
206738fd1498Szrj   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
206838fd1498Szrj     {
206938fd1498Szrj       vec<loop_p> loop_nest = vNULL;
207038fd1498Szrj       vec<data_reference_p> datarefs = vNULL;
207138fd1498Szrj       vec<ddr_p> ddrs = vNULL;
207238fd1498Szrj       if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
207338fd1498Szrj 	{
207438fd1498Szrj 	  tree_loop_interchange loop_interchange (loop_nest);
207538fd1498Szrj 	  changed_p |= loop_interchange.interchange (datarefs, ddrs);
207638fd1498Szrj 	}
207738fd1498Szrj       free_dependence_relations (ddrs);
207838fd1498Szrj       free_data_refs_with_aux (datarefs);
207938fd1498Szrj       loop_nest.release ();
208038fd1498Szrj     }
208138fd1498Szrj 
208238fd1498Szrj   return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
208338fd1498Szrj }
208438fd1498Szrj 
208538fd1498Szrj } // anon namespace
208638fd1498Szrj 
208738fd1498Szrj gimple_opt_pass *
make_pass_linterchange(gcc::context * ctxt)208838fd1498Szrj make_pass_linterchange (gcc::context *ctxt)
208938fd1498Szrj {
209038fd1498Szrj   return new pass_linterchange (ctxt);
209138fd1498Szrj }
2092