138fd1498Szrj /* Loop Vectorization
238fd1498Szrj    Copyright (C) 2003-2018 Free Software Foundation, Inc.
338fd1498Szrj    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
438fd1498Szrj    Ira Rosen <irar@il.ibm.com>
538fd1498Szrj 
638fd1498Szrj This file is part of GCC.
738fd1498Szrj 
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
938fd1498Szrj the terms of the GNU General Public License as published by the Free
1038fd1498Szrj Software Foundation; either version 3, or (at your option) any later
1138fd1498Szrj version.
1238fd1498Szrj 
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1438fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj 
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3.  If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>.  */
2138fd1498Szrj 
2238fd1498Szrj #include "config.h"
2338fd1498Szrj #include "system.h"
2438fd1498Szrj #include "coretypes.h"
2538fd1498Szrj #include "backend.h"
2638fd1498Szrj #include "target.h"
2738fd1498Szrj #include "rtl.h"
2838fd1498Szrj #include "tree.h"
2938fd1498Szrj #include "gimple.h"
3038fd1498Szrj #include "cfghooks.h"
3138fd1498Szrj #include "tree-pass.h"
3238fd1498Szrj #include "ssa.h"
3338fd1498Szrj #include "optabs-tree.h"
3438fd1498Szrj #include "diagnostic-core.h"
3538fd1498Szrj #include "fold-const.h"
3638fd1498Szrj #include "stor-layout.h"
3738fd1498Szrj #include "cfganal.h"
3838fd1498Szrj #include "gimplify.h"
3938fd1498Szrj #include "gimple-iterator.h"
4038fd1498Szrj #include "gimplify-me.h"
4138fd1498Szrj #include "tree-ssa-loop-ivopts.h"
4238fd1498Szrj #include "tree-ssa-loop-manip.h"
4338fd1498Szrj #include "tree-ssa-loop-niter.h"
4438fd1498Szrj #include "tree-ssa-loop.h"
4538fd1498Szrj #include "cfgloop.h"
4638fd1498Szrj #include "params.h"
4738fd1498Szrj #include "tree-scalar-evolution.h"
4838fd1498Szrj #include "tree-vectorizer.h"
4938fd1498Szrj #include "gimple-fold.h"
5038fd1498Szrj #include "cgraph.h"
5138fd1498Szrj #include "tree-cfg.h"
5238fd1498Szrj #include "tree-if-conv.h"
5338fd1498Szrj #include "internal-fn.h"
5438fd1498Szrj #include "tree-vector-builder.h"
5538fd1498Szrj #include "vec-perm-indices.h"
5638fd1498Szrj #include "tree-eh.h"
5738fd1498Szrj 
5838fd1498Szrj /* Loop Vectorization Pass.
5938fd1498Szrj 
6038fd1498Szrj    This pass tries to vectorize loops.
6138fd1498Szrj 
6238fd1498Szrj    For example, the vectorizer transforms the following simple loop:
6338fd1498Szrj 
6438fd1498Szrj         short a[N]; short b[N]; short c[N]; int i;
6538fd1498Szrj 
6638fd1498Szrj         for (i=0; i<N; i++){
6738fd1498Szrj           a[i] = b[i] + c[i];
6838fd1498Szrj         }
6938fd1498Szrj 
7038fd1498Szrj    as if it was manually vectorized by rewriting the source code into:
7138fd1498Szrj 
7238fd1498Szrj         typedef int __attribute__((mode(V8HI))) v8hi;
7338fd1498Szrj         short a[N];  short b[N]; short c[N];   int i;
7438fd1498Szrj         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
7538fd1498Szrj         v8hi va, vb, vc;
7638fd1498Szrj 
7738fd1498Szrj         for (i=0; i<N/8; i++){
7838fd1498Szrj           vb = pb[i];
7938fd1498Szrj           vc = pc[i];
8038fd1498Szrj           va = vb + vc;
8138fd1498Szrj           pa[i] = va;
8238fd1498Szrj         }
8338fd1498Szrj 
8438fd1498Szrj         The main entry to this pass is vectorize_loops(), in which
8538fd1498Szrj    the vectorizer applies a set of analyses on a given set of loops,
8638fd1498Szrj    followed by the actual vectorization transformation for the loops that
8738fd1498Szrj    had successfully passed the analysis phase.
8838fd1498Szrj         Throughout this pass we make a distinction between two types of
8938fd1498Szrj    data: scalars (which are represented by SSA_NAMES), and memory references
9038fd1498Szrj    ("data-refs").  These two types of data require different handling both
9138fd1498Szrj    during analysis and transformation. The types of data-refs that the
9238fd1498Szrj    vectorizer currently supports are ARRAY_REFS which base is an array DECL
9338fd1498Szrj    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
9438fd1498Szrj    accesses are required to have a simple (consecutive) access pattern.
9538fd1498Szrj 
9638fd1498Szrj    Analysis phase:
9738fd1498Szrj    ===============
9838fd1498Szrj         The driver for the analysis phase is vect_analyze_loop().
9938fd1498Szrj    It applies a set of analyses, some of which rely on the scalar evolution
10038fd1498Szrj    analyzer (scev) developed by Sebastian Pop.
10138fd1498Szrj 
10238fd1498Szrj         During the analysis phase the vectorizer records some information
10338fd1498Szrj    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
10438fd1498Szrj    loop, as well as general information about the loop as a whole, which is
10538fd1498Szrj    recorded in a "loop_vec_info" struct attached to each loop.
10638fd1498Szrj 
10738fd1498Szrj    Transformation phase:
10838fd1498Szrj    =====================
10938fd1498Szrj         The loop transformation phase scans all the stmts in the loop, and
11038fd1498Szrj    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
11138fd1498Szrj    the loop that needs to be vectorized.  It inserts the vector code sequence
11238fd1498Szrj    just before the scalar stmt S, and records a pointer to the vector code
11338fd1498Szrj    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
11438fd1498Szrj    attached to S).  This pointer will be used for the vectorization of following
11538fd1498Szrj    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
11638fd1498Szrj    otherwise, we rely on dead code elimination for removing it.
11738fd1498Szrj 
11838fd1498Szrj         For example, say stmt S1 was vectorized into stmt VS1:
11938fd1498Szrj 
12038fd1498Szrj    VS1: vb = px[i];
12138fd1498Szrj    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
12238fd1498Szrj    S2:  a = b;
12338fd1498Szrj 
12438fd1498Szrj    To vectorize stmt S2, the vectorizer first finds the stmt that defines
12538fd1498Szrj    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
12638fd1498Szrj    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
12738fd1498Szrj    resulting sequence would be:
12838fd1498Szrj 
12938fd1498Szrj    VS1: vb = px[i];
13038fd1498Szrj    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
13138fd1498Szrj    VS2: va = vb;
13238fd1498Szrj    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
13338fd1498Szrj 
13438fd1498Szrj         Operands that are not SSA_NAMEs, are data-refs that appear in
13538fd1498Szrj    load/store operations (like 'x[i]' in S1), and are handled differently.
13638fd1498Szrj 
13738fd1498Szrj    Target modeling:
13838fd1498Szrj    =================
13938fd1498Szrj         Currently the only target specific information that is used is the
14038fd1498Szrj    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
14138fd1498Szrj    Targets that can support different sizes of vectors, for now will need
14238fd1498Szrj    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
14338fd1498Szrj    flexibility will be added in the future.
14438fd1498Szrj 
14538fd1498Szrj         Since we only vectorize operations which vector form can be
14638fd1498Szrj    expressed using existing tree codes, to verify that an operation is
14738fd1498Szrj    supported, the vectorizer checks the relevant optab at the relevant
14838fd1498Szrj    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
14938fd1498Szrj    the value found is CODE_FOR_nothing, then there's no target support, and
15038fd1498Szrj    we can't vectorize the stmt.
15138fd1498Szrj 
15238fd1498Szrj    For additional information on this project see:
15338fd1498Szrj    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
15438fd1498Szrj */
15538fd1498Szrj 
15638fd1498Szrj static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
15738fd1498Szrj 
15838fd1498Szrj /* Function vect_determine_vectorization_factor
15938fd1498Szrj 
16038fd1498Szrj    Determine the vectorization factor (VF).  VF is the number of data elements
16138fd1498Szrj    that are operated upon in parallel in a single iteration of the vectorized
16238fd1498Szrj    loop.  For example, when vectorizing a loop that operates on 4byte elements,
16338fd1498Szrj    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
16438fd1498Szrj    elements can fit in a single vector register.
16538fd1498Szrj 
16638fd1498Szrj    We currently support vectorization of loops in which all types operated upon
16738fd1498Szrj    are of the same size.  Therefore this function currently sets VF according to
16838fd1498Szrj    the size of the types operated upon, and fails if there are multiple sizes
16938fd1498Szrj    in the loop.
17038fd1498Szrj 
17138fd1498Szrj    VF is also the factor by which the loop iterations are strip-mined, e.g.:
17238fd1498Szrj    original loop:
17338fd1498Szrj         for (i=0; i<N; i++){
17438fd1498Szrj           a[i] = b[i] + c[i];
17538fd1498Szrj         }
17638fd1498Szrj 
17738fd1498Szrj    vectorized loop:
17838fd1498Szrj         for (i=0; i<N; i+=VF){
17938fd1498Szrj           a[i:VF] = b[i:VF] + c[i:VF];
18038fd1498Szrj         }
18138fd1498Szrj */
18238fd1498Szrj 
18338fd1498Szrj static bool
vect_determine_vectorization_factor(loop_vec_info loop_vinfo)18438fd1498Szrj vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
18538fd1498Szrj {
18638fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
18738fd1498Szrj   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
18838fd1498Szrj   unsigned nbbs = loop->num_nodes;
18938fd1498Szrj   poly_uint64 vectorization_factor = 1;
19038fd1498Szrj   tree scalar_type = NULL_TREE;
19138fd1498Szrj   gphi *phi;
19238fd1498Szrj   tree vectype;
19338fd1498Szrj   stmt_vec_info stmt_info;
19438fd1498Szrj   unsigned i;
19538fd1498Szrj   HOST_WIDE_INT dummy;
19638fd1498Szrj   gimple *stmt, *pattern_stmt = NULL;
19738fd1498Szrj   gimple_seq pattern_def_seq = NULL;
19838fd1498Szrj   gimple_stmt_iterator pattern_def_si = gsi_none ();
19938fd1498Szrj   bool analyze_pattern_stmt = false;
20038fd1498Szrj   bool bool_result;
20138fd1498Szrj   auto_vec<stmt_vec_info> mask_producers;
20238fd1498Szrj 
20338fd1498Szrj   if (dump_enabled_p ())
20438fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
20538fd1498Szrj                      "=== vect_determine_vectorization_factor ===\n");
20638fd1498Szrj 
20738fd1498Szrj   for (i = 0; i < nbbs; i++)
20838fd1498Szrj     {
20938fd1498Szrj       basic_block bb = bbs[i];
21038fd1498Szrj 
21138fd1498Szrj       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
21238fd1498Szrj 	   gsi_next (&si))
21338fd1498Szrj 	{
21438fd1498Szrj 	  phi = si.phi ();
21538fd1498Szrj 	  stmt_info = vinfo_for_stmt (phi);
21638fd1498Szrj 	  if (dump_enabled_p ())
21738fd1498Szrj 	    {
21838fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
21938fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
22038fd1498Szrj 	    }
22138fd1498Szrj 
22238fd1498Szrj 	  gcc_assert (stmt_info);
22338fd1498Szrj 
22438fd1498Szrj 	  if (STMT_VINFO_RELEVANT_P (stmt_info)
22538fd1498Szrj 	      || STMT_VINFO_LIVE_P (stmt_info))
22638fd1498Szrj             {
22738fd1498Szrj 	      gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
22838fd1498Szrj               scalar_type = TREE_TYPE (PHI_RESULT (phi));
22938fd1498Szrj 
23038fd1498Szrj 	      if (dump_enabled_p ())
23138fd1498Szrj 		{
23238fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location,
23338fd1498Szrj                                    "get vectype for scalar type:  ");
23438fd1498Szrj 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
23538fd1498Szrj                   dump_printf (MSG_NOTE, "\n");
23638fd1498Szrj 		}
23738fd1498Szrj 
23838fd1498Szrj 	      vectype = get_vectype_for_scalar_type (scalar_type);
23938fd1498Szrj 	      if (!vectype)
24038fd1498Szrj 		{
24138fd1498Szrj 		  if (dump_enabled_p ())
24238fd1498Szrj 		    {
24338fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
24438fd1498Szrj                                        "not vectorized: unsupported "
24538fd1498Szrj                                        "data-type ");
24638fd1498Szrj 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
24738fd1498Szrj                                          scalar_type);
24838fd1498Szrj                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
24938fd1498Szrj 		    }
25038fd1498Szrj 		  return false;
25138fd1498Szrj 		}
25238fd1498Szrj 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
25338fd1498Szrj 
25438fd1498Szrj 	      if (dump_enabled_p ())
25538fd1498Szrj 		{
25638fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
25738fd1498Szrj 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
25838fd1498Szrj                   dump_printf (MSG_NOTE, "\n");
25938fd1498Szrj 		}
26038fd1498Szrj 
26138fd1498Szrj 	      if (dump_enabled_p ())
26238fd1498Szrj 		{
26338fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
26438fd1498Szrj 		  dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
26538fd1498Szrj 		  dump_printf (MSG_NOTE, "\n");
26638fd1498Szrj 		}
26738fd1498Szrj 
26838fd1498Szrj 	      vect_update_max_nunits (&vectorization_factor, vectype);
26938fd1498Szrj 	    }
27038fd1498Szrj 	}
27138fd1498Szrj 
27238fd1498Szrj       for (gimple_stmt_iterator si = gsi_start_bb (bb);
27338fd1498Szrj 	   !gsi_end_p (si) || analyze_pattern_stmt;)
27438fd1498Szrj         {
27538fd1498Szrj           tree vf_vectype;
27638fd1498Szrj 
27738fd1498Szrj           if (analyze_pattern_stmt)
27838fd1498Szrj 	    stmt = pattern_stmt;
27938fd1498Szrj           else
28038fd1498Szrj             stmt = gsi_stmt (si);
28138fd1498Szrj 
28238fd1498Szrj           stmt_info = vinfo_for_stmt (stmt);
28338fd1498Szrj 
28438fd1498Szrj 	  if (dump_enabled_p ())
28538fd1498Szrj 	    {
28638fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
28738fd1498Szrj                                "==> examining statement: ");
28838fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
28938fd1498Szrj 	    }
29038fd1498Szrj 
29138fd1498Szrj 	  gcc_assert (stmt_info);
29238fd1498Szrj 
29338fd1498Szrj 	  /* Skip stmts which do not need to be vectorized.  */
29438fd1498Szrj 	  if ((!STMT_VINFO_RELEVANT_P (stmt_info)
29538fd1498Szrj 	       && !STMT_VINFO_LIVE_P (stmt_info))
29638fd1498Szrj 	      || gimple_clobber_p (stmt))
29738fd1498Szrj             {
29838fd1498Szrj               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
29938fd1498Szrj                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
30038fd1498Szrj                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
30138fd1498Szrj                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
30238fd1498Szrj                 {
30338fd1498Szrj                   stmt = pattern_stmt;
30438fd1498Szrj                   stmt_info = vinfo_for_stmt (pattern_stmt);
30538fd1498Szrj                   if (dump_enabled_p ())
30638fd1498Szrj                     {
30738fd1498Szrj                       dump_printf_loc (MSG_NOTE, vect_location,
30838fd1498Szrj                                        "==> examining pattern statement: ");
30938fd1498Szrj                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
31038fd1498Szrj                     }
31138fd1498Szrj                 }
31238fd1498Szrj               else
31338fd1498Szrj 	        {
31438fd1498Szrj 	          if (dump_enabled_p ())
31538fd1498Szrj 	            dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
31638fd1498Szrj                   gsi_next (&si);
31738fd1498Szrj 	          continue;
31838fd1498Szrj                 }
31938fd1498Szrj 	    }
32038fd1498Szrj           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
32138fd1498Szrj                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
32238fd1498Szrj                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
32338fd1498Szrj                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
32438fd1498Szrj             analyze_pattern_stmt = true;
32538fd1498Szrj 
32638fd1498Szrj 	  /* If a pattern statement has def stmts, analyze them too.  */
32738fd1498Szrj 	  if (is_pattern_stmt_p (stmt_info))
32838fd1498Szrj 	    {
32938fd1498Szrj 	      if (pattern_def_seq == NULL)
33038fd1498Szrj 		{
33138fd1498Szrj 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
33238fd1498Szrj 		  pattern_def_si = gsi_start (pattern_def_seq);
33338fd1498Szrj 		}
33438fd1498Szrj 	      else if (!gsi_end_p (pattern_def_si))
33538fd1498Szrj 		gsi_next (&pattern_def_si);
33638fd1498Szrj 	      if (pattern_def_seq != NULL)
33738fd1498Szrj 		{
33838fd1498Szrj 		  gimple *pattern_def_stmt = NULL;
33938fd1498Szrj 		  stmt_vec_info pattern_def_stmt_info = NULL;
34038fd1498Szrj 
34138fd1498Szrj 		  while (!gsi_end_p (pattern_def_si))
34238fd1498Szrj 		    {
34338fd1498Szrj 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
34438fd1498Szrj 		      pattern_def_stmt_info
34538fd1498Szrj 			= vinfo_for_stmt (pattern_def_stmt);
34638fd1498Szrj 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
34738fd1498Szrj 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
34838fd1498Szrj 			break;
34938fd1498Szrj 		      gsi_next (&pattern_def_si);
35038fd1498Szrj 		    }
35138fd1498Szrj 
35238fd1498Szrj 		  if (!gsi_end_p (pattern_def_si))
35338fd1498Szrj 		    {
35438fd1498Szrj 		      if (dump_enabled_p ())
35538fd1498Szrj 			{
35638fd1498Szrj 			  dump_printf_loc (MSG_NOTE, vect_location,
35738fd1498Szrj                                            "==> examining pattern def stmt: ");
35838fd1498Szrj 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
35938fd1498Szrj                                             pattern_def_stmt, 0);
36038fd1498Szrj 			}
36138fd1498Szrj 
36238fd1498Szrj 		      stmt = pattern_def_stmt;
36338fd1498Szrj 		      stmt_info = pattern_def_stmt_info;
36438fd1498Szrj 		    }
36538fd1498Szrj 		  else
36638fd1498Szrj 		    {
36738fd1498Szrj 		      pattern_def_si = gsi_none ();
36838fd1498Szrj 		      analyze_pattern_stmt = false;
36938fd1498Szrj 		    }
37038fd1498Szrj 		}
37138fd1498Szrj 	      else
37238fd1498Szrj 		analyze_pattern_stmt = false;
37338fd1498Szrj 	    }
37438fd1498Szrj 
37538fd1498Szrj 	  if (gimple_get_lhs (stmt) == NULL_TREE
37638fd1498Szrj 	      /* MASK_STORE has no lhs, but is ok.  */
37738fd1498Szrj 	      && (!is_gimple_call (stmt)
37838fd1498Szrj 		  || !gimple_call_internal_p (stmt)
37938fd1498Szrj 		  || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
38038fd1498Szrj 	    {
38138fd1498Szrj 	      if (is_gimple_call (stmt))
38238fd1498Szrj 		{
38338fd1498Szrj 		  /* Ignore calls with no lhs.  These must be calls to
38438fd1498Szrj 		     #pragma omp simd functions, and what vectorization factor
38538fd1498Szrj 		     it really needs can't be determined until
38638fd1498Szrj 		     vectorizable_simd_clone_call.  */
38738fd1498Szrj 		  if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
38838fd1498Szrj 		    {
38938fd1498Szrj 		      pattern_def_seq = NULL;
39038fd1498Szrj 		      gsi_next (&si);
39138fd1498Szrj 		    }
39238fd1498Szrj 		  continue;
39338fd1498Szrj 		}
39438fd1498Szrj 	      if (dump_enabled_p ())
39538fd1498Szrj 		{
39638fd1498Szrj 	          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
39738fd1498Szrj                                    "not vectorized: irregular stmt.");
39838fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
39938fd1498Szrj                                     0);
40038fd1498Szrj 		}
40138fd1498Szrj 	      return false;
40238fd1498Szrj 	    }
40338fd1498Szrj 
40438fd1498Szrj 	  if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
40538fd1498Szrj 	    {
40638fd1498Szrj 	      if (dump_enabled_p ())
40738fd1498Szrj 	        {
40838fd1498Szrj 	          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
40938fd1498Szrj                                    "not vectorized: vector stmt in loop:");
41038fd1498Szrj 	          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
41138fd1498Szrj 	        }
41238fd1498Szrj 	      return false;
41338fd1498Szrj 	    }
41438fd1498Szrj 
41538fd1498Szrj 	  bool_result = false;
41638fd1498Szrj 
41738fd1498Szrj 	  if (STMT_VINFO_VECTYPE (stmt_info))
41838fd1498Szrj 	    {
41938fd1498Szrj 	      /* The only case when a vectype had been already set is for stmts
42038fd1498Szrj 	         that contain a dataref, or for "pattern-stmts" (stmts
42138fd1498Szrj 		 generated by the vectorizer to represent/replace a certain
42238fd1498Szrj 		 idiom).  */
42338fd1498Szrj 	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
42438fd1498Szrj 			  || is_pattern_stmt_p (stmt_info)
42538fd1498Szrj 			  || !gsi_end_p (pattern_def_si));
42638fd1498Szrj 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
42738fd1498Szrj 	    }
42838fd1498Szrj 	  else
42938fd1498Szrj 	    {
43038fd1498Szrj 	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
43138fd1498Szrj 	      if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
43238fd1498Szrj 		scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
43338fd1498Szrj 	      else
43438fd1498Szrj 		scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
43538fd1498Szrj 
43638fd1498Szrj 	      /* Bool ops don't participate in vectorization factor
43738fd1498Szrj 		 computation.  For comparison use compared types to
43838fd1498Szrj 		 compute a factor.  */
43938fd1498Szrj 	      if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
44038fd1498Szrj 		  && is_gimple_assign (stmt)
44138fd1498Szrj 		  && gimple_assign_rhs_code (stmt) != COND_EXPR)
44238fd1498Szrj 		{
44338fd1498Szrj 		  if (STMT_VINFO_RELEVANT_P (stmt_info)
44438fd1498Szrj 		      || STMT_VINFO_LIVE_P (stmt_info))
44538fd1498Szrj 		    mask_producers.safe_push (stmt_info);
44638fd1498Szrj 		  bool_result = true;
44738fd1498Szrj 
44838fd1498Szrj 		  if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
44938fd1498Szrj 		      == tcc_comparison
45038fd1498Szrj 		      && !VECT_SCALAR_BOOLEAN_TYPE_P
45138fd1498Szrj 			    (TREE_TYPE (gimple_assign_rhs1 (stmt))))
45238fd1498Szrj 		    scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
45338fd1498Szrj 		  else
45438fd1498Szrj 		    {
45538fd1498Szrj 		      if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
45638fd1498Szrj 			{
45738fd1498Szrj 			  pattern_def_seq = NULL;
45838fd1498Szrj 			  gsi_next (&si);
45938fd1498Szrj 			}
46038fd1498Szrj 		      continue;
46138fd1498Szrj 		    }
46238fd1498Szrj 		}
46338fd1498Szrj 
46438fd1498Szrj 	      if (dump_enabled_p ())
46538fd1498Szrj 		{
46638fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location,
46738fd1498Szrj                                    "get vectype for scalar type:  ");
46838fd1498Szrj 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
46938fd1498Szrj                   dump_printf (MSG_NOTE, "\n");
47038fd1498Szrj 		}
47138fd1498Szrj 	      vectype = get_vectype_for_scalar_type (scalar_type);
47238fd1498Szrj 	      if (!vectype)
47338fd1498Szrj 		{
47438fd1498Szrj 		  if (dump_enabled_p ())
47538fd1498Szrj 		    {
47638fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
47738fd1498Szrj                                        "not vectorized: unsupported "
47838fd1498Szrj                                        "data-type ");
47938fd1498Szrj 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
48038fd1498Szrj                                          scalar_type);
48138fd1498Szrj                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
48238fd1498Szrj 		    }
48338fd1498Szrj 		  return false;
48438fd1498Szrj 		}
48538fd1498Szrj 
48638fd1498Szrj 	      if (!bool_result)
48738fd1498Szrj 		STMT_VINFO_VECTYPE (stmt_info) = vectype;
48838fd1498Szrj 
48938fd1498Szrj 	      if (dump_enabled_p ())
49038fd1498Szrj 		{
49138fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
49238fd1498Szrj 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
49338fd1498Szrj                   dump_printf (MSG_NOTE, "\n");
49438fd1498Szrj 		}
49538fd1498Szrj             }
49638fd1498Szrj 
49738fd1498Szrj 	  /* Don't try to compute VF out scalar types if we stmt
49838fd1498Szrj 	     produces boolean vector.  Use result vectype instead.  */
49938fd1498Szrj 	  if (VECTOR_BOOLEAN_TYPE_P (vectype))
50038fd1498Szrj 	    vf_vectype = vectype;
50138fd1498Szrj 	  else
50238fd1498Szrj 	    {
50338fd1498Szrj 	      /* The vectorization factor is according to the smallest
50438fd1498Szrj 		 scalar type (or the largest vector size, but we only
50538fd1498Szrj 		 support one vector size per loop).  */
50638fd1498Szrj 	      if (!bool_result)
50738fd1498Szrj 		scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
50838fd1498Szrj 							     &dummy);
50938fd1498Szrj 	      if (dump_enabled_p ())
51038fd1498Szrj 		{
51138fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location,
51238fd1498Szrj 				   "get vectype for scalar type:  ");
51338fd1498Szrj 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
51438fd1498Szrj 		  dump_printf (MSG_NOTE, "\n");
51538fd1498Szrj 		}
51638fd1498Szrj 	      vf_vectype = get_vectype_for_scalar_type (scalar_type);
51738fd1498Szrj 	    }
51838fd1498Szrj 	  if (!vf_vectype)
51938fd1498Szrj 	    {
52038fd1498Szrj 	      if (dump_enabled_p ())
52138fd1498Szrj 		{
52238fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
52338fd1498Szrj                                    "not vectorized: unsupported data-type ");
52438fd1498Szrj 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
52538fd1498Szrj                                      scalar_type);
52638fd1498Szrj                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
52738fd1498Szrj 		}
52838fd1498Szrj 	      return false;
52938fd1498Szrj 	    }
53038fd1498Szrj 
53138fd1498Szrj 	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
53238fd1498Szrj 			GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
53338fd1498Szrj 	    {
53438fd1498Szrj 	      if (dump_enabled_p ())
53538fd1498Szrj 		{
53638fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
53738fd1498Szrj                                    "not vectorized: different sized vector "
53838fd1498Szrj                                    "types in statement, ");
53938fd1498Szrj 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
54038fd1498Szrj                                      vectype);
54138fd1498Szrj 		  dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
54238fd1498Szrj 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
54338fd1498Szrj                                      vf_vectype);
54438fd1498Szrj                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
54538fd1498Szrj 		}
54638fd1498Szrj 	      return false;
54738fd1498Szrj 	    }
54838fd1498Szrj 
54938fd1498Szrj 	  if (dump_enabled_p ())
55038fd1498Szrj 	    {
55138fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
55238fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
55338fd1498Szrj               dump_printf (MSG_NOTE, "\n");
55438fd1498Szrj 	    }
55538fd1498Szrj 
55638fd1498Szrj 	  if (dump_enabled_p ())
55738fd1498Szrj 	    {
55838fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
55938fd1498Szrj 	      dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vf_vectype));
56038fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
56138fd1498Szrj 	    }
56238fd1498Szrj 
56338fd1498Szrj 	  vect_update_max_nunits (&vectorization_factor, vf_vectype);
56438fd1498Szrj 
56538fd1498Szrj 	  if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
56638fd1498Szrj 	    {
56738fd1498Szrj 	      pattern_def_seq = NULL;
56838fd1498Szrj 	      gsi_next (&si);
56938fd1498Szrj 	    }
57038fd1498Szrj         }
57138fd1498Szrj     }
57238fd1498Szrj 
57338fd1498Szrj   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
57438fd1498Szrj   if (dump_enabled_p ())
57538fd1498Szrj     {
57638fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
57738fd1498Szrj       dump_dec (MSG_NOTE, vectorization_factor);
57838fd1498Szrj       dump_printf (MSG_NOTE, "\n");
57938fd1498Szrj     }
58038fd1498Szrj 
58138fd1498Szrj   if (known_le (vectorization_factor, 1U))
58238fd1498Szrj     {
58338fd1498Szrj       if (dump_enabled_p ())
58438fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
58538fd1498Szrj                          "not vectorized: unsupported data-type\n");
58638fd1498Szrj       return false;
58738fd1498Szrj     }
58838fd1498Szrj   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
58938fd1498Szrj 
59038fd1498Szrj   for (i = 0; i < mask_producers.length (); i++)
59138fd1498Szrj     {
59238fd1498Szrj       tree mask_type = NULL;
59338fd1498Szrj 
59438fd1498Szrj       stmt = STMT_VINFO_STMT (mask_producers[i]);
59538fd1498Szrj 
59638fd1498Szrj       if (is_gimple_assign (stmt)
59738fd1498Szrj 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
59838fd1498Szrj 	  && !VECT_SCALAR_BOOLEAN_TYPE_P
59938fd1498Szrj 				      (TREE_TYPE (gimple_assign_rhs1 (stmt))))
60038fd1498Szrj 	{
60138fd1498Szrj 	  scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
60238fd1498Szrj 	  mask_type = get_mask_type_for_scalar_type (scalar_type);
60338fd1498Szrj 
60438fd1498Szrj 	  if (!mask_type)
60538fd1498Szrj 	    {
60638fd1498Szrj 	      if (dump_enabled_p ())
60738fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
60838fd1498Szrj 				 "not vectorized: unsupported mask\n");
60938fd1498Szrj 	      return false;
61038fd1498Szrj 	    }
61138fd1498Szrj 	}
61238fd1498Szrj       else
61338fd1498Szrj 	{
61438fd1498Szrj 	  tree rhs;
61538fd1498Szrj 	  ssa_op_iter iter;
61638fd1498Szrj 	  gimple *def_stmt;
61738fd1498Szrj 	  enum vect_def_type dt;
61838fd1498Szrj 
61938fd1498Szrj 	  FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
62038fd1498Szrj 	    {
62138fd1498Szrj 	      if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
62238fd1498Szrj 				       &def_stmt, &dt, &vectype))
62338fd1498Szrj 		{
62438fd1498Szrj 		  if (dump_enabled_p ())
62538fd1498Szrj 		    {
62638fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
62738fd1498Szrj 				       "not vectorized: can't compute mask type "
62838fd1498Szrj 				       "for statement, ");
62938fd1498Szrj 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
63038fd1498Szrj 					0);
63138fd1498Szrj 		    }
63238fd1498Szrj 		  return false;
63338fd1498Szrj 		}
63438fd1498Szrj 
63538fd1498Szrj 	      /* No vectype probably means external definition.
63638fd1498Szrj 		 Allow it in case there is another operand which
63738fd1498Szrj 		 allows to determine mask type.  */
63838fd1498Szrj 	      if (!vectype)
63938fd1498Szrj 		continue;
64038fd1498Szrj 
64138fd1498Szrj 	      if (!mask_type)
64238fd1498Szrj 		mask_type = vectype;
64338fd1498Szrj 	      else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
64438fd1498Szrj 				 TYPE_VECTOR_SUBPARTS (vectype)))
64538fd1498Szrj 		{
64638fd1498Szrj 		  if (dump_enabled_p ())
64738fd1498Szrj 		    {
64838fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
64938fd1498Szrj 				       "not vectorized: different sized masks "
65038fd1498Szrj 				       "types in statement, ");
65138fd1498Szrj 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
65238fd1498Szrj 					 mask_type);
65338fd1498Szrj 		      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
65438fd1498Szrj 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
65538fd1498Szrj 					 vectype);
65638fd1498Szrj 		      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
65738fd1498Szrj 		    }
65838fd1498Szrj 		  return false;
65938fd1498Szrj 		}
66038fd1498Szrj 	      else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
66138fd1498Szrj 		       != VECTOR_BOOLEAN_TYPE_P (vectype))
66238fd1498Szrj 		{
66338fd1498Szrj 		  if (dump_enabled_p ())
66438fd1498Szrj 		    {
66538fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
66638fd1498Szrj 				       "not vectorized: mixed mask and "
66738fd1498Szrj 				       "nonmask vector types in statement, ");
66838fd1498Szrj 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
66938fd1498Szrj 					 mask_type);
67038fd1498Szrj 		      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
67138fd1498Szrj 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
67238fd1498Szrj 					 vectype);
67338fd1498Szrj 		      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
67438fd1498Szrj 		    }
67538fd1498Szrj 		  return false;
67638fd1498Szrj 		}
67738fd1498Szrj 	    }
67838fd1498Szrj 
67938fd1498Szrj 	  /* We may compare boolean value loaded as vector of integers.
68038fd1498Szrj 	     Fix mask_type in such case.  */
68138fd1498Szrj 	  if (mask_type
68238fd1498Szrj 	      && !VECTOR_BOOLEAN_TYPE_P (mask_type)
68338fd1498Szrj 	      && gimple_code (stmt) == GIMPLE_ASSIGN
68438fd1498Szrj 	      && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
68538fd1498Szrj 	    mask_type = build_same_sized_truth_vector_type (mask_type);
68638fd1498Szrj 	}
68738fd1498Szrj 
68838fd1498Szrj       /* No mask_type should mean loop invariant predicate.
68938fd1498Szrj 	 This is probably a subject for optimization in
69038fd1498Szrj 	 if-conversion.  */
69138fd1498Szrj       if (!mask_type)
69238fd1498Szrj 	{
69338fd1498Szrj 	  if (dump_enabled_p ())
69438fd1498Szrj 	    {
69538fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
69638fd1498Szrj 			       "not vectorized: can't compute mask type "
69738fd1498Szrj 			       "for statement, ");
69838fd1498Szrj 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
69938fd1498Szrj 				0);
70038fd1498Szrj 	    }
70138fd1498Szrj 	  return false;
70238fd1498Szrj 	}
70338fd1498Szrj 
70438fd1498Szrj       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
70538fd1498Szrj     }
70638fd1498Szrj 
70738fd1498Szrj   return true;
70838fd1498Szrj }
70938fd1498Szrj 
71038fd1498Szrj 
71138fd1498Szrj /* Function vect_is_simple_iv_evolution.
71238fd1498Szrj 
71338fd1498Szrj    FORNOW: A simple evolution of an induction variables in the loop is
71438fd1498Szrj    considered a polynomial evolution.  */
71538fd1498Szrj 
71638fd1498Szrj static bool
vect_is_simple_iv_evolution(unsigned loop_nb,tree access_fn,tree * init,tree * step)71738fd1498Szrj vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
71838fd1498Szrj                              tree * step)
71938fd1498Szrj {
72038fd1498Szrj   tree init_expr;
72138fd1498Szrj   tree step_expr;
72238fd1498Szrj   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
72338fd1498Szrj   basic_block bb;
72438fd1498Szrj 
72538fd1498Szrj   /* When there is no evolution in this loop, the evolution function
72638fd1498Szrj      is not "simple".  */
72738fd1498Szrj   if (evolution_part == NULL_TREE)
72838fd1498Szrj     return false;
72938fd1498Szrj 
73038fd1498Szrj   /* When the evolution is a polynomial of degree >= 2
73138fd1498Szrj      the evolution function is not "simple".  */
73238fd1498Szrj   if (tree_is_chrec (evolution_part))
73338fd1498Szrj     return false;
73438fd1498Szrj 
73538fd1498Szrj   step_expr = evolution_part;
73638fd1498Szrj   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
73738fd1498Szrj 
73838fd1498Szrj   if (dump_enabled_p ())
73938fd1498Szrj     {
74038fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
74138fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
74238fd1498Szrj       dump_printf (MSG_NOTE, ",  init: ");
74338fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
74438fd1498Szrj       dump_printf (MSG_NOTE, "\n");
74538fd1498Szrj     }
74638fd1498Szrj 
74738fd1498Szrj   *init = init_expr;
74838fd1498Szrj   *step = step_expr;
74938fd1498Szrj 
75038fd1498Szrj   if (TREE_CODE (step_expr) != INTEGER_CST
75138fd1498Szrj       && (TREE_CODE (step_expr) != SSA_NAME
75238fd1498Szrj 	  || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
75338fd1498Szrj 	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
75438fd1498Szrj 	  || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
75538fd1498Szrj 	      && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
75638fd1498Szrj 		  || !flag_associative_math)))
75738fd1498Szrj       && (TREE_CODE (step_expr) != REAL_CST
75838fd1498Szrj 	  || !flag_associative_math))
75938fd1498Szrj     {
76038fd1498Szrj       if (dump_enabled_p ())
76138fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
76238fd1498Szrj                          "step unknown.\n");
76338fd1498Szrj       return false;
76438fd1498Szrj     }
76538fd1498Szrj 
76638fd1498Szrj   return true;
76738fd1498Szrj }
76838fd1498Szrj 
76938fd1498Szrj /* Function vect_analyze_scalar_cycles_1.
77038fd1498Szrj 
77138fd1498Szrj    Examine the cross iteration def-use cycles of scalar variables
77238fd1498Szrj    in LOOP.  LOOP_VINFO represents the loop that is now being
77338fd1498Szrj    considered for vectorization (can be LOOP, or an outer-loop
77438fd1498Szrj    enclosing LOOP).  */
77538fd1498Szrj 
77638fd1498Szrj static void
vect_analyze_scalar_cycles_1(loop_vec_info loop_vinfo,struct loop * loop)77738fd1498Szrj vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
77838fd1498Szrj {
77938fd1498Szrj   basic_block bb = loop->header;
78038fd1498Szrj   tree init, step;
78138fd1498Szrj   auto_vec<gimple *, 64> worklist;
78238fd1498Szrj   gphi_iterator gsi;
78338fd1498Szrj   bool double_reduc;
78438fd1498Szrj 
78538fd1498Szrj   if (dump_enabled_p ())
78638fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
78738fd1498Szrj                      "=== vect_analyze_scalar_cycles ===\n");
78838fd1498Szrj 
78938fd1498Szrj   /* First - identify all inductions.  Reduction detection assumes that all the
79038fd1498Szrj      inductions have been identified, therefore, this order must not be
79138fd1498Szrj      changed.  */
79238fd1498Szrj   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
79338fd1498Szrj     {
79438fd1498Szrj       gphi *phi = gsi.phi ();
79538fd1498Szrj       tree access_fn = NULL;
79638fd1498Szrj       tree def = PHI_RESULT (phi);
79738fd1498Szrj       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
79838fd1498Szrj 
79938fd1498Szrj       if (dump_enabled_p ())
80038fd1498Szrj 	{
80138fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
80238fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
80338fd1498Szrj 	}
80438fd1498Szrj 
80538fd1498Szrj       /* Skip virtual phi's.  The data dependences that are associated with
80638fd1498Szrj          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
80738fd1498Szrj       if (virtual_operand_p (def))
80838fd1498Szrj 	continue;
80938fd1498Szrj 
81038fd1498Szrj       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
81138fd1498Szrj 
81238fd1498Szrj       /* Analyze the evolution function.  */
81338fd1498Szrj       access_fn = analyze_scalar_evolution (loop, def);
81438fd1498Szrj       if (access_fn)
81538fd1498Szrj 	{
81638fd1498Szrj 	  STRIP_NOPS (access_fn);
81738fd1498Szrj 	  if (dump_enabled_p ())
81838fd1498Szrj 	    {
81938fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
82038fd1498Szrj                                "Access function of PHI: ");
82138fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
82238fd1498Szrj               dump_printf (MSG_NOTE, "\n");
82338fd1498Szrj 	    }
82438fd1498Szrj 	  STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
82538fd1498Szrj 	    = initial_condition_in_loop_num (access_fn, loop->num);
82638fd1498Szrj 	  STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
82738fd1498Szrj 	    = evolution_part_in_loop_num (access_fn, loop->num);
82838fd1498Szrj 	}
82938fd1498Szrj 
83038fd1498Szrj       if (!access_fn
83138fd1498Szrj 	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
83238fd1498Szrj 	  || (LOOP_VINFO_LOOP (loop_vinfo) != loop
83338fd1498Szrj 	      && TREE_CODE (step) != INTEGER_CST))
83438fd1498Szrj 	{
83538fd1498Szrj 	  worklist.safe_push (phi);
83638fd1498Szrj 	  continue;
83738fd1498Szrj 	}
83838fd1498Szrj 
83938fd1498Szrj       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
84038fd1498Szrj 		  != NULL_TREE);
84138fd1498Szrj       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
84238fd1498Szrj 
84338fd1498Szrj       if (dump_enabled_p ())
84438fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
84538fd1498Szrj       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
84638fd1498Szrj     }
84738fd1498Szrj 
84838fd1498Szrj 
84938fd1498Szrj   /* Second - identify all reductions and nested cycles.  */
85038fd1498Szrj   while (worklist.length () > 0)
85138fd1498Szrj     {
85238fd1498Szrj       gimple *phi = worklist.pop ();
85338fd1498Szrj       tree def = PHI_RESULT (phi);
85438fd1498Szrj       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
85538fd1498Szrj       gimple *reduc_stmt;
85638fd1498Szrj 
85738fd1498Szrj       if (dump_enabled_p ())
85838fd1498Szrj         {
85938fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
86038fd1498Szrj           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
86138fd1498Szrj         }
86238fd1498Szrj 
86338fd1498Szrj       gcc_assert (!virtual_operand_p (def)
86438fd1498Szrj 		  && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
86538fd1498Szrj 
86638fd1498Szrj       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi,
86738fd1498Szrj 						&double_reduc, false);
86838fd1498Szrj       if (reduc_stmt)
86938fd1498Szrj         {
87038fd1498Szrj           if (double_reduc)
87138fd1498Szrj             {
87238fd1498Szrj               if (dump_enabled_p ())
87338fd1498Szrj                 dump_printf_loc (MSG_NOTE, vect_location,
87438fd1498Szrj 				 "Detected double reduction.\n");
87538fd1498Szrj 
87638fd1498Szrj               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
87738fd1498Szrj               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
87838fd1498Szrj                                                     vect_double_reduction_def;
87938fd1498Szrj             }
88038fd1498Szrj           else
88138fd1498Szrj             {
88238fd1498Szrj               if (loop != LOOP_VINFO_LOOP (loop_vinfo))
88338fd1498Szrj                 {
88438fd1498Szrj                   if (dump_enabled_p ())
88538fd1498Szrj                     dump_printf_loc (MSG_NOTE, vect_location,
88638fd1498Szrj 				     "Detected vectorizable nested cycle.\n");
88738fd1498Szrj 
88838fd1498Szrj                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
88938fd1498Szrj                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
89038fd1498Szrj                                                              vect_nested_cycle;
89138fd1498Szrj                 }
89238fd1498Szrj               else
89338fd1498Szrj                 {
89438fd1498Szrj                   if (dump_enabled_p ())
89538fd1498Szrj                     dump_printf_loc (MSG_NOTE, vect_location,
89638fd1498Szrj 				     "Detected reduction.\n");
89738fd1498Szrj 
89838fd1498Szrj                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
89938fd1498Szrj                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
90038fd1498Szrj                                                            vect_reduction_def;
90138fd1498Szrj                   /* Store the reduction cycles for possible vectorization in
90238fd1498Szrj                      loop-aware SLP if it was not detected as reduction
90338fd1498Szrj 		     chain.  */
90438fd1498Szrj 		  if (! GROUP_FIRST_ELEMENT (vinfo_for_stmt (reduc_stmt)))
90538fd1498Szrj 		    LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
90638fd1498Szrj                 }
90738fd1498Szrj             }
90838fd1498Szrj         }
90938fd1498Szrj       else
91038fd1498Szrj         if (dump_enabled_p ())
91138fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
91238fd1498Szrj 			   "Unknown def-use cycle pattern.\n");
91338fd1498Szrj     }
91438fd1498Szrj }
91538fd1498Szrj 
91638fd1498Szrj 
91738fd1498Szrj /* Function vect_analyze_scalar_cycles.
91838fd1498Szrj 
91938fd1498Szrj    Examine the cross iteration def-use cycles of scalar variables, by
92038fd1498Szrj    analyzing the loop-header PHIs of scalar variables.  Classify each
92138fd1498Szrj    cycle as one of the following: invariant, induction, reduction, unknown.
92238fd1498Szrj    We do that for the loop represented by LOOP_VINFO, and also to its
92338fd1498Szrj    inner-loop, if exists.
92438fd1498Szrj    Examples for scalar cycles:
92538fd1498Szrj 
92638fd1498Szrj    Example1: reduction:
92738fd1498Szrj 
92838fd1498Szrj               loop1:
92938fd1498Szrj               for (i=0; i<N; i++)
93038fd1498Szrj                  sum += a[i];
93138fd1498Szrj 
93238fd1498Szrj    Example2: induction:
93338fd1498Szrj 
93438fd1498Szrj               loop2:
93538fd1498Szrj               for (i=0; i<N; i++)
93638fd1498Szrj                  a[i] = i;  */
93738fd1498Szrj 
93838fd1498Szrj static void
vect_analyze_scalar_cycles(loop_vec_info loop_vinfo)93938fd1498Szrj vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
94038fd1498Szrj {
94138fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
94238fd1498Szrj 
94338fd1498Szrj   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
94438fd1498Szrj 
94538fd1498Szrj   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
94638fd1498Szrj      Reductions in such inner-loop therefore have different properties than
94738fd1498Szrj      the reductions in the nest that gets vectorized:
94838fd1498Szrj      1. When vectorized, they are executed in the same order as in the original
94938fd1498Szrj         scalar loop, so we can't change the order of computation when
95038fd1498Szrj         vectorizing them.
95138fd1498Szrj      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
95238fd1498Szrj         current checks are too strict.  */
95338fd1498Szrj 
95438fd1498Szrj   if (loop->inner)
95538fd1498Szrj     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
95638fd1498Szrj }
95738fd1498Szrj 
95838fd1498Szrj /* Transfer group and reduction information from STMT to its pattern stmt.  */
95938fd1498Szrj 
96038fd1498Szrj static void
vect_fixup_reduc_chain(gimple * stmt)96138fd1498Szrj vect_fixup_reduc_chain (gimple *stmt)
96238fd1498Szrj {
96338fd1498Szrj   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
96438fd1498Szrj   gimple *stmtp;
96538fd1498Szrj   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
96638fd1498Szrj 	      && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
96738fd1498Szrj   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
96838fd1498Szrj   do
96938fd1498Szrj     {
97038fd1498Szrj       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
97138fd1498Szrj       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
97238fd1498Szrj       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
97338fd1498Szrj       if (stmt)
97438fd1498Szrj 	GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
97538fd1498Szrj 	  = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
97638fd1498Szrj     }
97738fd1498Szrj   while (stmt);
97838fd1498Szrj   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
97938fd1498Szrj }
98038fd1498Szrj 
98138fd1498Szrj /* Fixup scalar cycles that now have their stmts detected as patterns.  */
98238fd1498Szrj 
98338fd1498Szrj static void
vect_fixup_scalar_cycles_with_patterns(loop_vec_info loop_vinfo)98438fd1498Szrj vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
98538fd1498Szrj {
98638fd1498Szrj   gimple *first;
98738fd1498Szrj   unsigned i;
98838fd1498Szrj 
98938fd1498Szrj   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
99038fd1498Szrj     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
99138fd1498Szrj       {
99238fd1498Szrj 	gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first));
99338fd1498Szrj 	while (next)
99438fd1498Szrj 	  {
99538fd1498Szrj 	    if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)))
99638fd1498Szrj 	      break;
99738fd1498Szrj 	    next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
99838fd1498Szrj 	  }
99938fd1498Szrj 	/* If not all stmt in the chain are patterns try to handle
100038fd1498Szrj 	   the chain without patterns.  */
100138fd1498Szrj 	if (! next)
100238fd1498Szrj 	  {
100338fd1498Szrj 	    vect_fixup_reduc_chain (first);
100438fd1498Szrj 	    LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
100538fd1498Szrj 	      = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
100638fd1498Szrj 	  }
100738fd1498Szrj       }
100838fd1498Szrj }
100938fd1498Szrj 
101038fd1498Szrj /* Function vect_get_loop_niters.
101138fd1498Szrj 
101238fd1498Szrj    Determine how many iterations the loop is executed and place it
101338fd1498Szrj    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
101438fd1498Szrj    in NUMBER_OF_ITERATIONSM1.  Place the condition under which the
101538fd1498Szrj    niter information holds in ASSUMPTIONS.
101638fd1498Szrj 
101738fd1498Szrj    Return the loop exit condition.  */
101838fd1498Szrj 
101938fd1498Szrj 
102038fd1498Szrj static gcond *
vect_get_loop_niters(struct loop * loop,tree * assumptions,tree * number_of_iterations,tree * number_of_iterationsm1)102138fd1498Szrj vect_get_loop_niters (struct loop *loop, tree *assumptions,
102238fd1498Szrj 		      tree *number_of_iterations, tree *number_of_iterationsm1)
102338fd1498Szrj {
102438fd1498Szrj   edge exit = single_exit (loop);
102538fd1498Szrj   struct tree_niter_desc niter_desc;
102638fd1498Szrj   tree niter_assumptions, niter, may_be_zero;
102738fd1498Szrj   gcond *cond = get_loop_exit_condition (loop);
102838fd1498Szrj 
102938fd1498Szrj   *assumptions = boolean_true_node;
103038fd1498Szrj   *number_of_iterationsm1 = chrec_dont_know;
103138fd1498Szrj   *number_of_iterations = chrec_dont_know;
103238fd1498Szrj   if (dump_enabled_p ())
103338fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
103438fd1498Szrj 		     "=== get_loop_niters ===\n");
103538fd1498Szrj 
103638fd1498Szrj   if (!exit)
103738fd1498Szrj     return cond;
103838fd1498Szrj 
103938fd1498Szrj   niter = chrec_dont_know;
104038fd1498Szrj   may_be_zero = NULL_TREE;
104138fd1498Szrj   niter_assumptions = boolean_true_node;
104238fd1498Szrj   if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
104338fd1498Szrj       || chrec_contains_undetermined (niter_desc.niter))
104438fd1498Szrj     return cond;
104538fd1498Szrj 
104638fd1498Szrj   niter_assumptions = niter_desc.assumptions;
104738fd1498Szrj   may_be_zero = niter_desc.may_be_zero;
104838fd1498Szrj   niter = niter_desc.niter;
104938fd1498Szrj 
105038fd1498Szrj   if (may_be_zero && integer_zerop (may_be_zero))
105138fd1498Szrj     may_be_zero = NULL_TREE;
105238fd1498Szrj 
105338fd1498Szrj   if (may_be_zero)
105438fd1498Szrj     {
105538fd1498Szrj       if (COMPARISON_CLASS_P (may_be_zero))
105638fd1498Szrj 	{
105738fd1498Szrj 	  /* Try to combine may_be_zero with assumptions, this can simplify
105838fd1498Szrj 	     computation of niter expression.  */
105938fd1498Szrj 	  if (niter_assumptions && !integer_nonzerop (niter_assumptions))
106038fd1498Szrj 	    niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
106138fd1498Szrj 					     niter_assumptions,
106238fd1498Szrj 					     fold_build1 (TRUTH_NOT_EXPR,
106338fd1498Szrj 							  boolean_type_node,
106438fd1498Szrj 							  may_be_zero));
106538fd1498Szrj 	  else
106638fd1498Szrj 	    niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
106738fd1498Szrj 				 build_int_cst (TREE_TYPE (niter), 0),
106838fd1498Szrj 				 rewrite_to_non_trapping_overflow (niter));
106938fd1498Szrj 
107038fd1498Szrj 	  may_be_zero = NULL_TREE;
107138fd1498Szrj 	}
107238fd1498Szrj       else if (integer_nonzerop (may_be_zero))
107338fd1498Szrj 	{
107438fd1498Szrj 	  *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0);
107538fd1498Szrj 	  *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1);
107638fd1498Szrj 	  return cond;
107738fd1498Szrj 	}
107838fd1498Szrj       else
107938fd1498Szrj 	return cond;
108038fd1498Szrj     }
108138fd1498Szrj 
108238fd1498Szrj   *assumptions = niter_assumptions;
108338fd1498Szrj   *number_of_iterationsm1 = niter;
108438fd1498Szrj 
108538fd1498Szrj   /* We want the number of loop header executions which is the number
108638fd1498Szrj      of latch executions plus one.
108738fd1498Szrj      ???  For UINT_MAX latch executions this number overflows to zero
108838fd1498Szrj      for loops like do { n++; } while (n != 0);  */
108938fd1498Szrj   if (niter && !chrec_contains_undetermined (niter))
109038fd1498Szrj     niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),
109138fd1498Szrj 			  build_int_cst (TREE_TYPE (niter), 1));
109238fd1498Szrj   *number_of_iterations = niter;
109338fd1498Szrj 
109438fd1498Szrj   return cond;
109538fd1498Szrj }
109638fd1498Szrj 
109738fd1498Szrj /* Function bb_in_loop_p
109838fd1498Szrj 
109938fd1498Szrj    Used as predicate for dfs order traversal of the loop bbs.  */
110038fd1498Szrj 
110138fd1498Szrj static bool
bb_in_loop_p(const_basic_block bb,const void * data)110238fd1498Szrj bb_in_loop_p (const_basic_block bb, const void *data)
110338fd1498Szrj {
110438fd1498Szrj   const struct loop *const loop = (const struct loop *)data;
110538fd1498Szrj   if (flow_bb_inside_loop_p (loop, bb))
110638fd1498Szrj     return true;
110738fd1498Szrj   return false;
110838fd1498Szrj }
110938fd1498Szrj 
111038fd1498Szrj 
111138fd1498Szrj /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as
111238fd1498Szrj    stmt_vec_info structs for all the stmts in LOOP_IN.  */
111338fd1498Szrj 
_loop_vec_info(struct loop * loop_in)111438fd1498Szrj _loop_vec_info::_loop_vec_info (struct loop *loop_in)
111538fd1498Szrj   : vec_info (vec_info::loop, init_cost (loop_in)),
111638fd1498Szrj     loop (loop_in),
111738fd1498Szrj     bbs (XCNEWVEC (basic_block, loop->num_nodes)),
111838fd1498Szrj     num_itersm1 (NULL_TREE),
111938fd1498Szrj     num_iters (NULL_TREE),
112038fd1498Szrj     num_iters_unchanged (NULL_TREE),
112138fd1498Szrj     num_iters_assumptions (NULL_TREE),
112238fd1498Szrj     th (0),
112338fd1498Szrj     versioning_threshold (0),
112438fd1498Szrj     vectorization_factor (0),
112538fd1498Szrj     max_vectorization_factor (0),
112638fd1498Szrj     mask_skip_niters (NULL_TREE),
112738fd1498Szrj     mask_compare_type (NULL_TREE),
112838fd1498Szrj     unaligned_dr (NULL),
112938fd1498Szrj     peeling_for_alignment (0),
113038fd1498Szrj     ptr_mask (0),
113138fd1498Szrj     ivexpr_map (NULL),
113238fd1498Szrj     slp_unrolling_factor (1),
113338fd1498Szrj     single_scalar_iteration_cost (0),
113438fd1498Szrj     vectorizable (false),
113538fd1498Szrj     can_fully_mask_p (true),
113638fd1498Szrj     fully_masked_p (false),
113738fd1498Szrj     peeling_for_gaps (false),
113838fd1498Szrj     peeling_for_niter (false),
113938fd1498Szrj     operands_swapped (false),
114038fd1498Szrj     no_data_dependencies (false),
114138fd1498Szrj     has_mask_store (false),
114238fd1498Szrj     scalar_loop (NULL),
114338fd1498Szrj     orig_loop_info (NULL)
114438fd1498Szrj {
114538fd1498Szrj   /* Create/Update stmt_info for all stmts in the loop.  */
114638fd1498Szrj   basic_block *body = get_loop_body (loop);
114738fd1498Szrj   for (unsigned int i = 0; i < loop->num_nodes; i++)
114838fd1498Szrj     {
114938fd1498Szrj       basic_block bb = body[i];
115038fd1498Szrj       gimple_stmt_iterator si;
115138fd1498Szrj 
115238fd1498Szrj       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
115338fd1498Szrj 	{
115438fd1498Szrj 	  gimple *phi = gsi_stmt (si);
115538fd1498Szrj 	  gimple_set_uid (phi, 0);
115638fd1498Szrj 	  set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, this));
115738fd1498Szrj 	}
115838fd1498Szrj 
115938fd1498Szrj       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
116038fd1498Szrj 	{
116138fd1498Szrj 	  gimple *stmt = gsi_stmt (si);
116238fd1498Szrj 	  gimple_set_uid (stmt, 0);
116338fd1498Szrj 	  set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
116438fd1498Szrj 	}
116538fd1498Szrj     }
116638fd1498Szrj   free (body);
116738fd1498Szrj 
116838fd1498Szrj   /* CHECKME: We want to visit all BBs before their successors (except for
116938fd1498Szrj      latch blocks, for which this assertion wouldn't hold).  In the simple
117038fd1498Szrj      case of the loop forms we allow, a dfs order of the BBs would the same
117138fd1498Szrj      as reversed postorder traversal, so we are safe.  */
117238fd1498Szrj 
117338fd1498Szrj   unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
117438fd1498Szrj 					  bbs, loop->num_nodes, loop);
117538fd1498Szrj   gcc_assert (nbbs == loop->num_nodes);
117638fd1498Szrj }
117738fd1498Szrj 
117838fd1498Szrj /* Free all levels of MASKS.  */
117938fd1498Szrj 
118038fd1498Szrj void
release_vec_loop_masks(vec_loop_masks * masks)118138fd1498Szrj release_vec_loop_masks (vec_loop_masks *masks)
118238fd1498Szrj {
118338fd1498Szrj   rgroup_masks *rgm;
118438fd1498Szrj   unsigned int i;
118538fd1498Szrj   FOR_EACH_VEC_ELT (*masks, i, rgm)
118638fd1498Szrj     rgm->masks.release ();
118738fd1498Szrj   masks->release ();
118838fd1498Szrj }
118938fd1498Szrj 
119038fd1498Szrj /* Free all memory used by the _loop_vec_info, as well as all the
119138fd1498Szrj    stmt_vec_info structs of all the stmts in the loop.  */
119238fd1498Szrj 
~_loop_vec_info()119338fd1498Szrj _loop_vec_info::~_loop_vec_info ()
119438fd1498Szrj {
119538fd1498Szrj   int nbbs;
119638fd1498Szrj   gimple_stmt_iterator si;
119738fd1498Szrj   int j;
119838fd1498Szrj 
119938fd1498Szrj   nbbs = loop->num_nodes;
120038fd1498Szrj   for (j = 0; j < nbbs; j++)
120138fd1498Szrj     {
120238fd1498Szrj       basic_block bb = bbs[j];
120338fd1498Szrj       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
120438fd1498Szrj         free_stmt_vec_info (gsi_stmt (si));
120538fd1498Szrj 
120638fd1498Szrj       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
120738fd1498Szrj         {
120838fd1498Szrj 	  gimple *stmt = gsi_stmt (si);
120938fd1498Szrj 
121038fd1498Szrj 	  /* We may have broken canonical form by moving a constant
121138fd1498Szrj 	     into RHS1 of a commutative op.  Fix such occurrences.  */
121238fd1498Szrj 	  if (operands_swapped && is_gimple_assign (stmt))
121338fd1498Szrj 	    {
121438fd1498Szrj 	      enum tree_code code = gimple_assign_rhs_code (stmt);
121538fd1498Szrj 
121638fd1498Szrj 	      if ((code == PLUS_EXPR
121738fd1498Szrj 		   || code == POINTER_PLUS_EXPR
121838fd1498Szrj 		   || code == MULT_EXPR)
121938fd1498Szrj 		  && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
122038fd1498Szrj 		swap_ssa_operands (stmt,
122138fd1498Szrj 				   gimple_assign_rhs1_ptr (stmt),
122238fd1498Szrj 				   gimple_assign_rhs2_ptr (stmt));
122338fd1498Szrj 	      else if (code == COND_EXPR
122438fd1498Szrj 		       && CONSTANT_CLASS_P (gimple_assign_rhs2 (stmt)))
122538fd1498Szrj 		{
122638fd1498Szrj 		  tree cond_expr = gimple_assign_rhs1 (stmt);
122738fd1498Szrj 		  enum tree_code cond_code = TREE_CODE (cond_expr);
122838fd1498Szrj 
122938fd1498Szrj 		  if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
123038fd1498Szrj 		    {
123138fd1498Szrj 		      bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr,
123238fd1498Szrj 								  0));
123338fd1498Szrj 		      cond_code = invert_tree_comparison (cond_code,
123438fd1498Szrj 							  honor_nans);
123538fd1498Szrj 		      if (cond_code != ERROR_MARK)
123638fd1498Szrj 			{
123738fd1498Szrj 			  TREE_SET_CODE (cond_expr, cond_code);
123838fd1498Szrj 			  swap_ssa_operands (stmt,
123938fd1498Szrj 					     gimple_assign_rhs2_ptr (stmt),
124038fd1498Szrj 					     gimple_assign_rhs3_ptr (stmt));
124138fd1498Szrj 			}
124238fd1498Szrj 		    }
124338fd1498Szrj 		}
124438fd1498Szrj 	    }
124538fd1498Szrj 
124638fd1498Szrj 	  /* Free stmt_vec_info.  */
124738fd1498Szrj 	  free_stmt_vec_info (stmt);
124838fd1498Szrj           gsi_next (&si);
124938fd1498Szrj         }
125038fd1498Szrj     }
125138fd1498Szrj 
125238fd1498Szrj   free (bbs);
125338fd1498Szrj 
125438fd1498Szrj   release_vec_loop_masks (&masks);
125538fd1498Szrj   delete ivexpr_map;
125638fd1498Szrj 
125738fd1498Szrj   loop->aux = NULL;
125838fd1498Szrj }
125938fd1498Szrj 
126038fd1498Szrj /* Return an invariant or register for EXPR and emit necessary
126138fd1498Szrj    computations in the LOOP_VINFO loop preheader.  */
126238fd1498Szrj 
126338fd1498Szrj tree
cse_and_gimplify_to_preheader(loop_vec_info loop_vinfo,tree expr)126438fd1498Szrj cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
126538fd1498Szrj {
126638fd1498Szrj   if (is_gimple_reg (expr)
126738fd1498Szrj       || is_gimple_min_invariant (expr))
126838fd1498Szrj     return expr;
126938fd1498Szrj 
127038fd1498Szrj   if (! loop_vinfo->ivexpr_map)
127138fd1498Szrj     loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
127238fd1498Szrj   tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr);
127338fd1498Szrj   if (! cached)
127438fd1498Szrj     {
127538fd1498Szrj       gimple_seq stmts = NULL;
127638fd1498Szrj       cached = force_gimple_operand (unshare_expr (expr),
127738fd1498Szrj 				     &stmts, true, NULL_TREE);
127838fd1498Szrj       if (stmts)
127938fd1498Szrj 	{
128038fd1498Szrj 	  edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
128138fd1498Szrj 	  gsi_insert_seq_on_edge_immediate (e, stmts);
128238fd1498Szrj 	}
128338fd1498Szrj     }
128438fd1498Szrj   return cached;
128538fd1498Szrj }
128638fd1498Szrj 
128738fd1498Szrj /* Return true if we can use CMP_TYPE as the comparison type to produce
128838fd1498Szrj    all masks required to mask LOOP_VINFO.  */
128938fd1498Szrj 
129038fd1498Szrj static bool
can_produce_all_loop_masks_p(loop_vec_info loop_vinfo,tree cmp_type)129138fd1498Szrj can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
129238fd1498Szrj {
129338fd1498Szrj   rgroup_masks *rgm;
129438fd1498Szrj   unsigned int i;
129538fd1498Szrj   FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
129638fd1498Szrj     if (rgm->mask_type != NULL_TREE
129738fd1498Szrj 	&& !direct_internal_fn_supported_p (IFN_WHILE_ULT,
129838fd1498Szrj 					    cmp_type, rgm->mask_type,
129938fd1498Szrj 					    OPTIMIZE_FOR_SPEED))
130038fd1498Szrj       return false;
130138fd1498Szrj   return true;
130238fd1498Szrj }
130338fd1498Szrj 
130438fd1498Szrj /* Calculate the maximum number of scalars per iteration for every
130538fd1498Szrj    rgroup in LOOP_VINFO.  */
130638fd1498Szrj 
130738fd1498Szrj static unsigned int
vect_get_max_nscalars_per_iter(loop_vec_info loop_vinfo)130838fd1498Szrj vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
130938fd1498Szrj {
131038fd1498Szrj   unsigned int res = 1;
131138fd1498Szrj   unsigned int i;
131238fd1498Szrj   rgroup_masks *rgm;
131338fd1498Szrj   FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
131438fd1498Szrj     res = MAX (res, rgm->max_nscalars_per_iter);
131538fd1498Szrj   return res;
131638fd1498Szrj }
131738fd1498Szrj 
131838fd1498Szrj /* Each statement in LOOP_VINFO can be masked where necessary.  Check
131938fd1498Szrj    whether we can actually generate the masks required.  Return true if so,
132038fd1498Szrj    storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE.  */
132138fd1498Szrj 
132238fd1498Szrj static bool
vect_verify_full_masking(loop_vec_info loop_vinfo)132338fd1498Szrj vect_verify_full_masking (loop_vec_info loop_vinfo)
132438fd1498Szrj {
132538fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
132638fd1498Szrj   unsigned int min_ni_width;
132738fd1498Szrj 
132838fd1498Szrj   /* Use a normal loop if there are no statements that need masking.
132938fd1498Szrj      This only happens in rare degenerate cases: it means that the loop
133038fd1498Szrj      has no loads, no stores, and no live-out values.  */
133138fd1498Szrj   if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
133238fd1498Szrj     return false;
133338fd1498Szrj 
133438fd1498Szrj   /* Get the maximum number of iterations that is representable
133538fd1498Szrj      in the counter type.  */
133638fd1498Szrj   tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
133738fd1498Szrj   widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
133838fd1498Szrj 
133938fd1498Szrj   /* Get a more refined estimate for the number of iterations.  */
134038fd1498Szrj   widest_int max_back_edges;
134138fd1498Szrj   if (max_loop_iterations (loop, &max_back_edges))
134238fd1498Szrj     max_ni = wi::smin (max_ni, max_back_edges + 1);
134338fd1498Szrj 
134438fd1498Szrj   /* Account for rgroup masks, in which each bit is replicated N times.  */
134538fd1498Szrj   max_ni *= vect_get_max_nscalars_per_iter (loop_vinfo);
134638fd1498Szrj 
134738fd1498Szrj   /* Work out how many bits we need to represent the limit.  */
134838fd1498Szrj   min_ni_width = wi::min_precision (max_ni, UNSIGNED);
134938fd1498Szrj 
135038fd1498Szrj   /* Find a scalar mode for which WHILE_ULT is supported.  */
135138fd1498Szrj   opt_scalar_int_mode cmp_mode_iter;
135238fd1498Szrj   tree cmp_type = NULL_TREE;
135338fd1498Szrj   FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
135438fd1498Szrj     {
135538fd1498Szrj       unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ());
135638fd1498Szrj       if (cmp_bits >= min_ni_width
135738fd1498Szrj 	  && targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
135838fd1498Szrj 	{
135938fd1498Szrj 	  tree this_type = build_nonstandard_integer_type (cmp_bits, true);
136038fd1498Szrj 	  if (this_type
136138fd1498Szrj 	      && can_produce_all_loop_masks_p (loop_vinfo, this_type))
136238fd1498Szrj 	    {
136338fd1498Szrj 	      /* Although we could stop as soon as we find a valid mode,
136438fd1498Szrj 		 it's often better to continue until we hit Pmode, since the
136538fd1498Szrj 		 operands to the WHILE are more likely to be reusable in
136638fd1498Szrj 		 address calculations.  */
136738fd1498Szrj 	      cmp_type = this_type;
136838fd1498Szrj 	      if (cmp_bits >= GET_MODE_BITSIZE (Pmode))
136938fd1498Szrj 		break;
137038fd1498Szrj 	    }
137138fd1498Szrj 	}
137238fd1498Szrj     }
137338fd1498Szrj 
137438fd1498Szrj   if (!cmp_type)
137538fd1498Szrj     return false;
137638fd1498Szrj 
137738fd1498Szrj   LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type;
137838fd1498Szrj   return true;
137938fd1498Szrj }
138038fd1498Szrj 
138138fd1498Szrj /* Calculate the cost of one scalar iteration of the loop.  */
138238fd1498Szrj static void
vect_compute_single_scalar_iteration_cost(loop_vec_info loop_vinfo)138338fd1498Szrj vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
138438fd1498Szrj {
138538fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
138638fd1498Szrj   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
138738fd1498Szrj   int nbbs = loop->num_nodes, factor;
138838fd1498Szrj   int innerloop_iters, i;
138938fd1498Szrj 
139038fd1498Szrj   /* Gather costs for statements in the scalar loop.  */
139138fd1498Szrj 
139238fd1498Szrj   /* FORNOW.  */
139338fd1498Szrj   innerloop_iters = 1;
139438fd1498Szrj   if (loop->inner)
139538fd1498Szrj     innerloop_iters = 50; /* FIXME */
139638fd1498Szrj 
139738fd1498Szrj   for (i = 0; i < nbbs; i++)
139838fd1498Szrj     {
139938fd1498Szrj       gimple_stmt_iterator si;
140038fd1498Szrj       basic_block bb = bbs[i];
140138fd1498Szrj 
140238fd1498Szrj       if (bb->loop_father == loop->inner)
140338fd1498Szrj         factor = innerloop_iters;
140438fd1498Szrj       else
140538fd1498Szrj         factor = 1;
140638fd1498Szrj 
140738fd1498Szrj       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
140838fd1498Szrj         {
140938fd1498Szrj 	  gimple *stmt = gsi_stmt (si);
141038fd1498Szrj           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
141138fd1498Szrj 
141238fd1498Szrj           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
141338fd1498Szrj             continue;
141438fd1498Szrj 
141538fd1498Szrj           /* Skip stmts that are not vectorized inside the loop.  */
141638fd1498Szrj           if (stmt_info
141738fd1498Szrj               && !STMT_VINFO_RELEVANT_P (stmt_info)
141838fd1498Szrj               && (!STMT_VINFO_LIVE_P (stmt_info)
141938fd1498Szrj                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
142038fd1498Szrj 	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
142138fd1498Szrj             continue;
142238fd1498Szrj 
142338fd1498Szrj 	  vect_cost_for_stmt kind;
142438fd1498Szrj           if (STMT_VINFO_DATA_REF (stmt_info))
142538fd1498Szrj             {
142638fd1498Szrj               if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
142738fd1498Szrj                kind = scalar_load;
142838fd1498Szrj              else
142938fd1498Szrj                kind = scalar_store;
143038fd1498Szrj             }
143138fd1498Szrj           else
143238fd1498Szrj             kind = scalar_stmt;
143338fd1498Szrj 
143438fd1498Szrj 	  record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
143538fd1498Szrj 			    factor, kind, stmt_info, 0, vect_prologue);
143638fd1498Szrj         }
143738fd1498Szrj     }
143838fd1498Szrj 
143938fd1498Szrj   /* Now accumulate cost.  */
144038fd1498Szrj   void *target_cost_data = init_cost (loop);
144138fd1498Szrj   stmt_info_for_cost *si;
144238fd1498Szrj   int j;
144338fd1498Szrj   FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
144438fd1498Szrj 		    j, si)
144538fd1498Szrj     {
144638fd1498Szrj       struct _stmt_vec_info *stmt_info
144738fd1498Szrj 	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
144838fd1498Szrj       (void) add_stmt_cost (target_cost_data, si->count,
144938fd1498Szrj 			    si->kind, stmt_info, si->misalign,
145038fd1498Szrj 			    vect_body);
145138fd1498Szrj     }
145238fd1498Szrj   unsigned dummy, body_cost = 0;
145338fd1498Szrj   finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
145438fd1498Szrj   destroy_cost_data (target_cost_data);
145538fd1498Szrj   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo) = body_cost;
145638fd1498Szrj }
145738fd1498Szrj 
145838fd1498Szrj 
145938fd1498Szrj /* Function vect_analyze_loop_form_1.
146038fd1498Szrj 
146138fd1498Szrj    Verify that certain CFG restrictions hold, including:
146238fd1498Szrj    - the loop has a pre-header
146338fd1498Szrj    - the loop has a single entry and exit
146438fd1498Szrj    - the loop exit condition is simple enough
146538fd1498Szrj    - the number of iterations can be analyzed, i.e, a countable loop.  The
146638fd1498Szrj      niter could be analyzed under some assumptions.  */
146738fd1498Szrj 
146838fd1498Szrj bool
vect_analyze_loop_form_1(struct loop * loop,gcond ** loop_cond,tree * assumptions,tree * number_of_iterationsm1,tree * number_of_iterations,gcond ** inner_loop_cond)146938fd1498Szrj vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
147038fd1498Szrj 			  tree *assumptions, tree *number_of_iterationsm1,
147138fd1498Szrj 			  tree *number_of_iterations, gcond **inner_loop_cond)
147238fd1498Szrj {
147338fd1498Szrj   if (dump_enabled_p ())
147438fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
147538fd1498Szrj 		     "=== vect_analyze_loop_form ===\n");
147638fd1498Szrj 
147738fd1498Szrj   /* Different restrictions apply when we are considering an inner-most loop,
147838fd1498Szrj      vs. an outer (nested) loop.
147938fd1498Szrj      (FORNOW. May want to relax some of these restrictions in the future).  */
148038fd1498Szrj 
148138fd1498Szrj   if (!loop->inner)
148238fd1498Szrj     {
148338fd1498Szrj       /* Inner-most loop.  We currently require that the number of BBs is
148438fd1498Szrj 	 exactly 2 (the header and latch).  Vectorizable inner-most loops
148538fd1498Szrj 	 look like this:
148638fd1498Szrj 
148738fd1498Szrj                         (pre-header)
148838fd1498Szrj                            |
148938fd1498Szrj                           header <--------+
149038fd1498Szrj                            | |            |
149138fd1498Szrj                            | +--> latch --+
149238fd1498Szrj                            |
149338fd1498Szrj                         (exit-bb)  */
149438fd1498Szrj 
149538fd1498Szrj       if (loop->num_nodes != 2)
149638fd1498Szrj         {
149738fd1498Szrj           if (dump_enabled_p ())
149838fd1498Szrj             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
149938fd1498Szrj 			     "not vectorized: control flow in loop.\n");
150038fd1498Szrj           return false;
150138fd1498Szrj         }
150238fd1498Szrj 
150338fd1498Szrj       if (empty_block_p (loop->header))
150438fd1498Szrj 	{
150538fd1498Szrj 	  if (dump_enabled_p ())
150638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
150738fd1498Szrj 			     "not vectorized: empty loop.\n");
150838fd1498Szrj 	  return false;
150938fd1498Szrj 	}
151038fd1498Szrj     }
151138fd1498Szrj   else
151238fd1498Szrj     {
151338fd1498Szrj       struct loop *innerloop = loop->inner;
151438fd1498Szrj       edge entryedge;
151538fd1498Szrj 
151638fd1498Szrj       /* Nested loop. We currently require that the loop is doubly-nested,
151738fd1498Szrj 	 contains a single inner loop, and the number of BBs is exactly 5.
151838fd1498Szrj 	 Vectorizable outer-loops look like this:
151938fd1498Szrj 
152038fd1498Szrj 			(pre-header)
152138fd1498Szrj 			   |
152238fd1498Szrj 			  header <---+
152338fd1498Szrj 			   |         |
152438fd1498Szrj 		          inner-loop |
152538fd1498Szrj 			   |         |
152638fd1498Szrj 			  tail ------+
152738fd1498Szrj 			   |
152838fd1498Szrj 		        (exit-bb)
152938fd1498Szrj 
153038fd1498Szrj 	 The inner-loop has the properties expected of inner-most loops
153138fd1498Szrj 	 as described above.  */
153238fd1498Szrj 
153338fd1498Szrj       if ((loop->inner)->inner || (loop->inner)->next)
153438fd1498Szrj 	{
153538fd1498Szrj 	  if (dump_enabled_p ())
153638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
153738fd1498Szrj 			     "not vectorized: multiple nested loops.\n");
153838fd1498Szrj 	  return false;
153938fd1498Szrj 	}
154038fd1498Szrj 
154138fd1498Szrj       if (loop->num_nodes != 5)
154238fd1498Szrj         {
154338fd1498Szrj 	  if (dump_enabled_p ())
154438fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
154538fd1498Szrj 			     "not vectorized: control flow in loop.\n");
154638fd1498Szrj 	  return false;
154738fd1498Szrj         }
154838fd1498Szrj 
154938fd1498Szrj       entryedge = loop_preheader_edge (innerloop);
155038fd1498Szrj       if (entryedge->src != loop->header
155138fd1498Szrj 	  || !single_exit (innerloop)
155238fd1498Szrj 	  || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
155338fd1498Szrj 	{
155438fd1498Szrj 	  if (dump_enabled_p ())
155538fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
155638fd1498Szrj 			     "not vectorized: unsupported outerloop form.\n");
155738fd1498Szrj 	  return false;
155838fd1498Szrj 	}
155938fd1498Szrj 
156038fd1498Szrj       /* Analyze the inner-loop.  */
156138fd1498Szrj       tree inner_niterm1, inner_niter, inner_assumptions;
156238fd1498Szrj       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
156338fd1498Szrj 				      &inner_assumptions, &inner_niterm1,
156438fd1498Szrj 				      &inner_niter, NULL)
156538fd1498Szrj 	  /* Don't support analyzing niter under assumptions for inner
156638fd1498Szrj 	     loop.  */
156738fd1498Szrj 	  || !integer_onep (inner_assumptions))
156838fd1498Szrj 	{
156938fd1498Szrj 	  if (dump_enabled_p ())
157038fd1498Szrj             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
157138fd1498Szrj 			     "not vectorized: Bad inner loop.\n");
157238fd1498Szrj 	  return false;
157338fd1498Szrj 	}
157438fd1498Szrj 
157538fd1498Szrj       if (!expr_invariant_in_loop_p (loop, inner_niter))
157638fd1498Szrj 	{
157738fd1498Szrj 	  if (dump_enabled_p ())
157838fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
157938fd1498Szrj 			     "not vectorized: inner-loop count not"
158038fd1498Szrj                              " invariant.\n");
158138fd1498Szrj 	  return false;
158238fd1498Szrj 	}
158338fd1498Szrj 
158438fd1498Szrj       if (dump_enabled_p ())
158538fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
158638fd1498Szrj 			 "Considering outer-loop vectorization.\n");
158738fd1498Szrj     }
158838fd1498Szrj 
158938fd1498Szrj   if (!single_exit (loop)
159038fd1498Szrj       || EDGE_COUNT (loop->header->preds) != 2)
159138fd1498Szrj     {
159238fd1498Szrj       if (dump_enabled_p ())
159338fd1498Szrj         {
159438fd1498Szrj           if (!single_exit (loop))
159538fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
159638fd1498Szrj 			     "not vectorized: multiple exits.\n");
159738fd1498Szrj           else if (EDGE_COUNT (loop->header->preds) != 2)
159838fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
159938fd1498Szrj 			     "not vectorized: too many incoming edges.\n");
160038fd1498Szrj         }
160138fd1498Szrj       return false;
160238fd1498Szrj     }
160338fd1498Szrj 
160438fd1498Szrj   /* We assume that the loop exit condition is at the end of the loop. i.e,
160538fd1498Szrj      that the loop is represented as a do-while (with a proper if-guard
160638fd1498Szrj      before the loop if needed), where the loop header contains all the
160738fd1498Szrj      executable statements, and the latch is empty.  */
160838fd1498Szrj   if (!empty_block_p (loop->latch)
160938fd1498Szrj       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
161038fd1498Szrj     {
161138fd1498Szrj       if (dump_enabled_p ())
161238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
161338fd1498Szrj 			 "not vectorized: latch block not empty.\n");
161438fd1498Szrj       return false;
161538fd1498Szrj     }
161638fd1498Szrj 
161738fd1498Szrj   /* Make sure the exit is not abnormal.  */
161838fd1498Szrj   edge e = single_exit (loop);
161938fd1498Szrj   if (e->flags & EDGE_ABNORMAL)
162038fd1498Szrj     {
162138fd1498Szrj       if (dump_enabled_p ())
162238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
162338fd1498Szrj 			 "not vectorized: abnormal loop exit edge.\n");
162438fd1498Szrj       return false;
162538fd1498Szrj     }
162638fd1498Szrj 
162738fd1498Szrj   *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations,
162838fd1498Szrj 				     number_of_iterationsm1);
162938fd1498Szrj   if (!*loop_cond)
163038fd1498Szrj     {
163138fd1498Szrj       if (dump_enabled_p ())
163238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
163338fd1498Szrj 			 "not vectorized: complicated exit condition.\n");
163438fd1498Szrj       return false;
163538fd1498Szrj     }
163638fd1498Szrj 
163738fd1498Szrj   if (integer_zerop (*assumptions)
163838fd1498Szrj       || !*number_of_iterations
163938fd1498Szrj       || chrec_contains_undetermined (*number_of_iterations))
164038fd1498Szrj     {
164138fd1498Szrj       if (dump_enabled_p ())
164238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
164338fd1498Szrj 			 "not vectorized: number of iterations cannot be "
164438fd1498Szrj 			 "computed.\n");
164538fd1498Szrj       return false;
164638fd1498Szrj     }
164738fd1498Szrj 
164838fd1498Szrj   if (integer_zerop (*number_of_iterations))
164938fd1498Szrj     {
165038fd1498Szrj       if (dump_enabled_p ())
165138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
165238fd1498Szrj 			 "not vectorized: number of iterations = 0.\n");
165338fd1498Szrj       return false;
165438fd1498Szrj     }
165538fd1498Szrj 
165638fd1498Szrj   return true;
165738fd1498Szrj }
165838fd1498Szrj 
165938fd1498Szrj /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
166038fd1498Szrj 
166138fd1498Szrj loop_vec_info
vect_analyze_loop_form(struct loop * loop)166238fd1498Szrj vect_analyze_loop_form (struct loop *loop)
166338fd1498Szrj {
166438fd1498Szrj   tree assumptions, number_of_iterations, number_of_iterationsm1;
166538fd1498Szrj   gcond *loop_cond, *inner_loop_cond = NULL;
166638fd1498Szrj 
166738fd1498Szrj   if (! vect_analyze_loop_form_1 (loop, &loop_cond,
166838fd1498Szrj 				  &assumptions, &number_of_iterationsm1,
166938fd1498Szrj 				  &number_of_iterations, &inner_loop_cond))
167038fd1498Szrj     return NULL;
167138fd1498Szrj 
167238fd1498Szrj   loop_vec_info loop_vinfo = new _loop_vec_info (loop);
167338fd1498Szrj   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
167438fd1498Szrj   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
167538fd1498Szrj   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
167638fd1498Szrj   if (!integer_onep (assumptions))
167738fd1498Szrj     {
167838fd1498Szrj       /* We consider to vectorize this loop by versioning it under
167938fd1498Szrj 	 some assumptions.  In order to do this, we need to clear
168038fd1498Szrj 	 existing information computed by scev and niter analyzer.  */
168138fd1498Szrj       scev_reset_htab ();
168238fd1498Szrj       free_numbers_of_iterations_estimates (loop);
168338fd1498Szrj       /* Also set flag for this loop so that following scev and niter
168438fd1498Szrj 	 analysis are done under the assumptions.  */
168538fd1498Szrj       loop_constraint_set (loop, LOOP_C_FINITE);
168638fd1498Szrj       /* Also record the assumptions for versioning.  */
168738fd1498Szrj       LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = assumptions;
168838fd1498Szrj     }
168938fd1498Szrj 
169038fd1498Szrj   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
169138fd1498Szrj     {
169238fd1498Szrj       if (dump_enabled_p ())
169338fd1498Szrj         {
169438fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
169538fd1498Szrj 			   "Symbolic number of iterations is ");
169638fd1498Szrj 	  dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
169738fd1498Szrj           dump_printf (MSG_NOTE, "\n");
169838fd1498Szrj         }
169938fd1498Szrj     }
170038fd1498Szrj 
170138fd1498Szrj   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
170238fd1498Szrj   if (inner_loop_cond)
170338fd1498Szrj     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
170438fd1498Szrj       = loop_exit_ctrl_vec_info_type;
170538fd1498Szrj 
170638fd1498Szrj   gcc_assert (!loop->aux);
170738fd1498Szrj   loop->aux = loop_vinfo;
170838fd1498Szrj   return loop_vinfo;
170938fd1498Szrj }
171038fd1498Szrj 
171138fd1498Szrj 
171238fd1498Szrj 
171338fd1498Szrj /* Scan the loop stmts and dependent on whether there are any (non-)SLP
171438fd1498Szrj    statements update the vectorization factor.  */
171538fd1498Szrj 
171638fd1498Szrj static void
vect_update_vf_for_slp(loop_vec_info loop_vinfo)171738fd1498Szrj vect_update_vf_for_slp (loop_vec_info loop_vinfo)
171838fd1498Szrj {
171938fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
172038fd1498Szrj   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
172138fd1498Szrj   int nbbs = loop->num_nodes;
172238fd1498Szrj   poly_uint64 vectorization_factor;
172338fd1498Szrj   int i;
172438fd1498Szrj 
172538fd1498Szrj   if (dump_enabled_p ())
172638fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
172738fd1498Szrj 		     "=== vect_update_vf_for_slp ===\n");
172838fd1498Szrj 
172938fd1498Szrj   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
173038fd1498Szrj   gcc_assert (known_ne (vectorization_factor, 0U));
173138fd1498Szrj 
173238fd1498Szrj   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
173338fd1498Szrj      vectorization factor of the loop is the unrolling factor required by
173438fd1498Szrj      the SLP instances.  If that unrolling factor is 1, we say, that we
173538fd1498Szrj      perform pure SLP on loop - cross iteration parallelism is not
173638fd1498Szrj      exploited.  */
173738fd1498Szrj   bool only_slp_in_loop = true;
173838fd1498Szrj   for (i = 0; i < nbbs; i++)
173938fd1498Szrj     {
174038fd1498Szrj       basic_block bb = bbs[i];
174138fd1498Szrj       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
174238fd1498Szrj 	   gsi_next (&si))
174338fd1498Szrj 	{
174438fd1498Szrj 	  gimple *stmt = gsi_stmt (si);
174538fd1498Szrj 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
174638fd1498Szrj 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info)
174738fd1498Szrj 	      && STMT_VINFO_RELATED_STMT (stmt_info))
174838fd1498Szrj 	    {
174938fd1498Szrj 	      stmt = STMT_VINFO_RELATED_STMT (stmt_info);
175038fd1498Szrj 	      stmt_info = vinfo_for_stmt (stmt);
175138fd1498Szrj 	    }
175238fd1498Szrj 	  if ((STMT_VINFO_RELEVANT_P (stmt_info)
175338fd1498Szrj 	       || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
175438fd1498Szrj 	      && !PURE_SLP_STMT (stmt_info))
175538fd1498Szrj 	    /* STMT needs both SLP and loop-based vectorization.  */
175638fd1498Szrj 	    only_slp_in_loop = false;
175738fd1498Szrj 	}
175838fd1498Szrj     }
175938fd1498Szrj 
176038fd1498Szrj   if (only_slp_in_loop)
176138fd1498Szrj     {
176238fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
176338fd1498Szrj 		       "Loop contains only SLP stmts\n");
176438fd1498Szrj       vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
176538fd1498Szrj     }
176638fd1498Szrj   else
176738fd1498Szrj     {
176838fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
176938fd1498Szrj 		       "Loop contains SLP and non-SLP stmts\n");
177038fd1498Szrj       /* Both the vectorization factor and unroll factor have the form
177138fd1498Szrj 	 current_vector_size * X for some rational X, so they must have
177238fd1498Szrj 	 a common multiple.  */
177338fd1498Szrj       vectorization_factor
177438fd1498Szrj 	= force_common_multiple (vectorization_factor,
177538fd1498Szrj 				 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
177638fd1498Szrj     }
177738fd1498Szrj 
177838fd1498Szrj   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
177938fd1498Szrj   if (dump_enabled_p ())
178038fd1498Szrj     {
178138fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
178238fd1498Szrj 		       "Updating vectorization factor to ");
178338fd1498Szrj       dump_dec (MSG_NOTE, vectorization_factor);
178438fd1498Szrj       dump_printf (MSG_NOTE, ".\n");
178538fd1498Szrj     }
178638fd1498Szrj }
178738fd1498Szrj 
178838fd1498Szrj /* Return true if STMT_INFO describes a double reduction phi and if
178938fd1498Szrj    the other phi in the reduction is also relevant for vectorization.
179038fd1498Szrj    This rejects cases such as:
179138fd1498Szrj 
179238fd1498Szrj       outer1:
179338fd1498Szrj 	x_1 = PHI <x_3(outer2), ...>;
179438fd1498Szrj 	...
179538fd1498Szrj 
179638fd1498Szrj       inner:
179738fd1498Szrj 	x_2 = ...;
179838fd1498Szrj 	...
179938fd1498Szrj 
180038fd1498Szrj       outer2:
180138fd1498Szrj 	x_3 = PHI <x_2(inner)>;
180238fd1498Szrj 
180338fd1498Szrj    if nothing in x_2 or elsewhere makes x_1 relevant.  */
180438fd1498Szrj 
180538fd1498Szrj static bool
vect_active_double_reduction_p(stmt_vec_info stmt_info)180638fd1498Szrj vect_active_double_reduction_p (stmt_vec_info stmt_info)
180738fd1498Szrj {
180838fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
180938fd1498Szrj     return false;
181038fd1498Szrj 
181138fd1498Szrj   gimple *other_phi = STMT_VINFO_REDUC_DEF (stmt_info);
181238fd1498Szrj   return STMT_VINFO_RELEVANT_P (vinfo_for_stmt (other_phi));
181338fd1498Szrj }
181438fd1498Szrj 
181538fd1498Szrj /* Function vect_analyze_loop_operations.
181638fd1498Szrj 
181738fd1498Szrj    Scan the loop stmts and make sure they are all vectorizable.  */
181838fd1498Szrj 
181938fd1498Szrj static bool
vect_analyze_loop_operations(loop_vec_info loop_vinfo)182038fd1498Szrj vect_analyze_loop_operations (loop_vec_info loop_vinfo)
182138fd1498Szrj {
182238fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
182338fd1498Szrj   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
182438fd1498Szrj   int nbbs = loop->num_nodes;
182538fd1498Szrj   int i;
182638fd1498Szrj   stmt_vec_info stmt_info;
182738fd1498Szrj   bool need_to_vectorize = false;
182838fd1498Szrj   bool ok;
182938fd1498Szrj 
183038fd1498Szrj   if (dump_enabled_p ())
183138fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
183238fd1498Szrj 		     "=== vect_analyze_loop_operations ===\n");
183338fd1498Szrj 
183438fd1498Szrj   for (i = 0; i < nbbs; i++)
183538fd1498Szrj     {
183638fd1498Szrj       basic_block bb = bbs[i];
183738fd1498Szrj 
183838fd1498Szrj       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
183938fd1498Szrj 	   gsi_next (&si))
184038fd1498Szrj         {
184138fd1498Szrj           gphi *phi = si.phi ();
184238fd1498Szrj           ok = true;
184338fd1498Szrj 
184438fd1498Szrj           stmt_info = vinfo_for_stmt (phi);
184538fd1498Szrj           if (dump_enabled_p ())
184638fd1498Szrj             {
184738fd1498Szrj               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
184838fd1498Szrj               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
184938fd1498Szrj             }
185038fd1498Szrj 	  if (virtual_operand_p (gimple_phi_result (phi)))
185138fd1498Szrj 	    continue;
185238fd1498Szrj 
185338fd1498Szrj           /* Inner-loop loop-closed exit phi in outer-loop vectorization
185438fd1498Szrj              (i.e., a phi in the tail of the outer-loop).  */
185538fd1498Szrj           if (! is_loop_header_bb_p (bb))
185638fd1498Szrj             {
185738fd1498Szrj               /* FORNOW: we currently don't support the case that these phis
185838fd1498Szrj                  are not used in the outerloop (unless it is double reduction,
185938fd1498Szrj                  i.e., this phi is vect_reduction_def), cause this case
186038fd1498Szrj                  requires to actually do something here.  */
186138fd1498Szrj               if (STMT_VINFO_LIVE_P (stmt_info)
186238fd1498Szrj 		  && !vect_active_double_reduction_p (stmt_info))
186338fd1498Szrj                 {
186438fd1498Szrj                   if (dump_enabled_p ())
186538fd1498Szrj 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
186638fd1498Szrj 				     "Unsupported loop-closed phi in "
186738fd1498Szrj 				     "outer-loop.\n");
186838fd1498Szrj                   return false;
186938fd1498Szrj                 }
187038fd1498Szrj 
187138fd1498Szrj               /* If PHI is used in the outer loop, we check that its operand
187238fd1498Szrj                  is defined in the inner loop.  */
187338fd1498Szrj               if (STMT_VINFO_RELEVANT_P (stmt_info))
187438fd1498Szrj                 {
187538fd1498Szrj                   tree phi_op;
187638fd1498Szrj 		  gimple *op_def_stmt;
187738fd1498Szrj 
187838fd1498Szrj                   if (gimple_phi_num_args (phi) != 1)
187938fd1498Szrj                     return false;
188038fd1498Szrj 
188138fd1498Szrj                   phi_op = PHI_ARG_DEF (phi, 0);
188238fd1498Szrj                   if (TREE_CODE (phi_op) != SSA_NAME)
188338fd1498Szrj                     return false;
188438fd1498Szrj 
188538fd1498Szrj                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
188638fd1498Szrj 		  if (gimple_nop_p (op_def_stmt)
188738fd1498Szrj 		      || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
188838fd1498Szrj 		      || !vinfo_for_stmt (op_def_stmt))
188938fd1498Szrj                     return false;
189038fd1498Szrj 
189138fd1498Szrj                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
189238fd1498Szrj                         != vect_used_in_outer
189338fd1498Szrj                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
189438fd1498Szrj                            != vect_used_in_outer_by_reduction)
189538fd1498Szrj                     return false;
189638fd1498Szrj                 }
189738fd1498Szrj 
189838fd1498Szrj               continue;
189938fd1498Szrj             }
190038fd1498Szrj 
190138fd1498Szrj           gcc_assert (stmt_info);
190238fd1498Szrj 
190338fd1498Szrj           if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
190438fd1498Szrj                || STMT_VINFO_LIVE_P (stmt_info))
190538fd1498Szrj               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
190638fd1498Szrj             {
190738fd1498Szrj               /* A scalar-dependence cycle that we don't support.  */
190838fd1498Szrj               if (dump_enabled_p ())
190938fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
191038fd1498Szrj 				 "not vectorized: scalar dependence cycle.\n");
191138fd1498Szrj               return false;
191238fd1498Szrj             }
191338fd1498Szrj 
191438fd1498Szrj           if (STMT_VINFO_RELEVANT_P (stmt_info))
191538fd1498Szrj             {
191638fd1498Szrj               need_to_vectorize = true;
191738fd1498Szrj               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
191838fd1498Szrj 		  && ! PURE_SLP_STMT (stmt_info))
191938fd1498Szrj                 ok = vectorizable_induction (phi, NULL, NULL, NULL);
192038fd1498Szrj 	      else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
192138fd1498Szrj 			|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
192238fd1498Szrj 		       && ! PURE_SLP_STMT (stmt_info))
192338fd1498Szrj 		ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL);
192438fd1498Szrj             }
192538fd1498Szrj 
192638fd1498Szrj 	  /* SLP PHIs are tested by vect_slp_analyze_node_operations.  */
192738fd1498Szrj 	  if (ok
192838fd1498Szrj 	      && STMT_VINFO_LIVE_P (stmt_info)
192938fd1498Szrj 	      && !PURE_SLP_STMT (stmt_info))
193038fd1498Szrj 	    ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
193138fd1498Szrj 
193238fd1498Szrj           if (!ok)
193338fd1498Szrj             {
193438fd1498Szrj               if (dump_enabled_p ())
193538fd1498Szrj                 {
193638fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
193738fd1498Szrj 				   "not vectorized: relevant phi not "
193838fd1498Szrj 				   "supported: ");
193938fd1498Szrj                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
194038fd1498Szrj                 }
194138fd1498Szrj 	      return false;
194238fd1498Szrj             }
194338fd1498Szrj         }
194438fd1498Szrj 
194538fd1498Szrj       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
194638fd1498Szrj 	   gsi_next (&si))
194738fd1498Szrj         {
194838fd1498Szrj 	  gimple *stmt = gsi_stmt (si);
194938fd1498Szrj 	  if (!gimple_clobber_p (stmt)
195038fd1498Szrj 	      && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL, NULL))
195138fd1498Szrj 	    return false;
195238fd1498Szrj         }
195338fd1498Szrj     } /* bbs */
195438fd1498Szrj 
195538fd1498Szrj   /* All operations in the loop are either irrelevant (deal with loop
195638fd1498Szrj      control, or dead), or only used outside the loop and can be moved
195738fd1498Szrj      out of the loop (e.g. invariants, inductions).  The loop can be
195838fd1498Szrj      optimized away by scalar optimizations.  We're better off not
195938fd1498Szrj      touching this loop.  */
196038fd1498Szrj   if (!need_to_vectorize)
196138fd1498Szrj     {
196238fd1498Szrj       if (dump_enabled_p ())
196338fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
196438fd1498Szrj 			 "All the computation can be taken out of the loop.\n");
196538fd1498Szrj       if (dump_enabled_p ())
196638fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
196738fd1498Szrj 			 "not vectorized: redundant loop. no profit to "
196838fd1498Szrj 			 "vectorize.\n");
196938fd1498Szrj       return false;
197038fd1498Szrj     }
197138fd1498Szrj 
197238fd1498Szrj   return true;
197338fd1498Szrj }
197438fd1498Szrj 
197538fd1498Szrj /* Analyze the cost of the loop described by LOOP_VINFO.  Decide if it
197638fd1498Szrj    is worthwhile to vectorize.  Return 1 if definitely yes, 0 if
197738fd1498Szrj    definitely no, or -1 if it's worth retrying.  */
197838fd1498Szrj 
197938fd1498Szrj static int
vect_analyze_loop_costing(loop_vec_info loop_vinfo)198038fd1498Szrj vect_analyze_loop_costing (loop_vec_info loop_vinfo)
198138fd1498Szrj {
198238fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
198338fd1498Szrj   unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
198438fd1498Szrj 
198538fd1498Szrj   /* Only fully-masked loops can have iteration counts less than the
198638fd1498Szrj      vectorization factor.  */
198738fd1498Szrj   if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
198838fd1498Szrj     {
198938fd1498Szrj       HOST_WIDE_INT max_niter;
199038fd1498Szrj 
199138fd1498Szrj       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
199238fd1498Szrj 	max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
199338fd1498Szrj       else
199438fd1498Szrj 	max_niter = max_stmt_executions_int (loop);
199538fd1498Szrj 
199638fd1498Szrj       if (max_niter != -1
199738fd1498Szrj 	  && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
199838fd1498Szrj 	{
199938fd1498Szrj 	  if (dump_enabled_p ())
200038fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
200138fd1498Szrj 			     "not vectorized: iteration count smaller than "
200238fd1498Szrj 			     "vectorization factor.\n");
200338fd1498Szrj 	  return 0;
200438fd1498Szrj 	}
200538fd1498Szrj     }
200638fd1498Szrj 
200738fd1498Szrj   int min_profitable_iters, min_profitable_estimate;
200838fd1498Szrj   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
200938fd1498Szrj 				      &min_profitable_estimate);
201038fd1498Szrj 
201138fd1498Szrj   if (min_profitable_iters < 0)
201238fd1498Szrj     {
201338fd1498Szrj       if (dump_enabled_p ())
201438fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
201538fd1498Szrj 			 "not vectorized: vectorization not profitable.\n");
201638fd1498Szrj       if (dump_enabled_p ())
201738fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
201838fd1498Szrj 			 "not vectorized: vector version will never be "
201938fd1498Szrj 			 "profitable.\n");
202038fd1498Szrj       return -1;
202138fd1498Szrj     }
202238fd1498Szrj 
202338fd1498Szrj   int min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
202438fd1498Szrj 			       * assumed_vf);
202538fd1498Szrj 
202638fd1498Szrj   /* Use the cost model only if it is more conservative than user specified
202738fd1498Szrj      threshold.  */
202838fd1498Szrj   unsigned int th = (unsigned) MAX (min_scalar_loop_bound,
202938fd1498Szrj 				    min_profitable_iters);
203038fd1498Szrj 
203138fd1498Szrj   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
203238fd1498Szrj 
203338fd1498Szrj   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
203438fd1498Szrj       && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
203538fd1498Szrj     {
203638fd1498Szrj       if (dump_enabled_p ())
203738fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
203838fd1498Szrj 			 "not vectorized: vectorization not profitable.\n");
203938fd1498Szrj       if (dump_enabled_p ())
204038fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
204138fd1498Szrj 			 "not vectorized: iteration count smaller than user "
204238fd1498Szrj 			 "specified loop bound parameter or minimum profitable "
204338fd1498Szrj 			 "iterations (whichever is more conservative).\n");
204438fd1498Szrj       return 0;
204538fd1498Szrj     }
204638fd1498Szrj 
204738fd1498Szrj   HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop);
204838fd1498Szrj   if (estimated_niter == -1)
204938fd1498Szrj     estimated_niter = likely_max_stmt_executions_int (loop);
205038fd1498Szrj   if (estimated_niter != -1
205138fd1498Szrj       && ((unsigned HOST_WIDE_INT) estimated_niter
205238fd1498Szrj 	  < MAX (th, (unsigned) min_profitable_estimate)))
205338fd1498Szrj     {
205438fd1498Szrj       if (dump_enabled_p ())
205538fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
205638fd1498Szrj 			 "not vectorized: estimated iteration count too "
205738fd1498Szrj 			 "small.\n");
205838fd1498Szrj       if (dump_enabled_p ())
205938fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
206038fd1498Szrj 			 "not vectorized: estimated iteration count smaller "
206138fd1498Szrj 			 "than specified loop bound parameter or minimum "
206238fd1498Szrj 			 "profitable iterations (whichever is more "
206338fd1498Szrj 			 "conservative).\n");
206438fd1498Szrj       return -1;
206538fd1498Szrj     }
206638fd1498Szrj 
206738fd1498Szrj   return 1;
206838fd1498Szrj }
206938fd1498Szrj 
207038fd1498Szrj 
207138fd1498Szrj /* Function vect_analyze_loop_2.
207238fd1498Szrj 
207338fd1498Szrj    Apply a set of analyses on LOOP, and create a loop_vec_info struct
207438fd1498Szrj    for it.  The different analyses will record information in the
207538fd1498Szrj    loop_vec_info struct.  */
207638fd1498Szrj static bool
vect_analyze_loop_2(loop_vec_info loop_vinfo,bool & fatal)207738fd1498Szrj vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
207838fd1498Szrj {
207938fd1498Szrj   bool ok;
208038fd1498Szrj   int res;
208138fd1498Szrj   unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
208238fd1498Szrj   poly_uint64 min_vf = 2;
208338fd1498Szrj   unsigned int n_stmts = 0;
208438fd1498Szrj 
208538fd1498Szrj   /* The first group of checks is independent of the vector size.  */
208638fd1498Szrj   fatal = true;
208738fd1498Szrj 
208838fd1498Szrj   /* Find all data references in the loop (which correspond to vdefs/vuses)
208938fd1498Szrj      and analyze their evolution in the loop.  */
209038fd1498Szrj 
209138fd1498Szrj   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
209238fd1498Szrj 
209338fd1498Szrj   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
209438fd1498Szrj   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
209538fd1498Szrj     {
209638fd1498Szrj       if (dump_enabled_p ())
209738fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
209838fd1498Szrj 			 "not vectorized: loop nest containing two "
209938fd1498Szrj 			 "or more consecutive inner loops cannot be "
210038fd1498Szrj 			 "vectorized\n");
210138fd1498Szrj       return false;
210238fd1498Szrj     }
210338fd1498Szrj 
210438fd1498Szrj   for (unsigned i = 0; i < loop->num_nodes; i++)
210538fd1498Szrj     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
210638fd1498Szrj 	 !gsi_end_p (gsi); gsi_next (&gsi))
210738fd1498Szrj       {
210838fd1498Szrj 	gimple *stmt = gsi_stmt (gsi);
210938fd1498Szrj 	if (is_gimple_debug (stmt))
211038fd1498Szrj 	  continue;
211138fd1498Szrj 	++n_stmts;
211238fd1498Szrj 	if (!find_data_references_in_stmt (loop, stmt,
211338fd1498Szrj 					   &LOOP_VINFO_DATAREFS (loop_vinfo)))
211438fd1498Szrj 	  {
211538fd1498Szrj 	    if (is_gimple_call (stmt) && loop->safelen)
211638fd1498Szrj 	      {
211738fd1498Szrj 		tree fndecl = gimple_call_fndecl (stmt), op;
211838fd1498Szrj 		if (fndecl != NULL_TREE)
211938fd1498Szrj 		  {
212038fd1498Szrj 		    cgraph_node *node = cgraph_node::get (fndecl);
212138fd1498Szrj 		    if (node != NULL && node->simd_clones != NULL)
212238fd1498Szrj 		      {
212338fd1498Szrj 			unsigned int j, n = gimple_call_num_args (stmt);
212438fd1498Szrj 			for (j = 0; j < n; j++)
212538fd1498Szrj 			  {
212638fd1498Szrj 			    op = gimple_call_arg (stmt, j);
212738fd1498Szrj 			    if (DECL_P (op)
212838fd1498Szrj 				|| (REFERENCE_CLASS_P (op)
212938fd1498Szrj 				    && get_base_address (op)))
213038fd1498Szrj 			      break;
213138fd1498Szrj 			  }
213238fd1498Szrj 			op = gimple_call_lhs (stmt);
213338fd1498Szrj 			/* Ignore #pragma omp declare simd functions
213438fd1498Szrj 			   if they don't have data references in the
213538fd1498Szrj 			   call stmt itself.  */
213638fd1498Szrj 			if (j == n
213738fd1498Szrj 			    && !(op
213838fd1498Szrj 				 && (DECL_P (op)
213938fd1498Szrj 				     || (REFERENCE_CLASS_P (op)
214038fd1498Szrj 					 && get_base_address (op)))))
214138fd1498Szrj 			  continue;
214238fd1498Szrj 		      }
214338fd1498Szrj 		  }
214438fd1498Szrj 	      }
214538fd1498Szrj 	    if (dump_enabled_p ())
214638fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
214738fd1498Szrj 			       "not vectorized: loop contains function "
214838fd1498Szrj 			       "calls or data references that cannot "
214938fd1498Szrj 			       "be analyzed\n");
215038fd1498Szrj 	    return false;
215138fd1498Szrj 	  }
215238fd1498Szrj       }
215338fd1498Szrj 
215438fd1498Szrj   /* Analyze the data references and also adjust the minimal
215538fd1498Szrj      vectorization factor according to the loads and stores.  */
215638fd1498Szrj 
215738fd1498Szrj   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
215838fd1498Szrj   if (!ok)
215938fd1498Szrj     {
216038fd1498Szrj       if (dump_enabled_p ())
216138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
216238fd1498Szrj 			 "bad data references.\n");
216338fd1498Szrj       return false;
216438fd1498Szrj     }
216538fd1498Szrj 
216638fd1498Szrj   /* Classify all cross-iteration scalar data-flow cycles.
216738fd1498Szrj      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
216838fd1498Szrj   vect_analyze_scalar_cycles (loop_vinfo);
216938fd1498Szrj 
217038fd1498Szrj   vect_pattern_recog (loop_vinfo);
217138fd1498Szrj 
217238fd1498Szrj   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
217338fd1498Szrj 
217438fd1498Szrj   /* Analyze the access patterns of the data-refs in the loop (consecutive,
217538fd1498Szrj      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
217638fd1498Szrj 
217738fd1498Szrj   ok = vect_analyze_data_ref_accesses (loop_vinfo);
217838fd1498Szrj   if (!ok)
217938fd1498Szrj     {
218038fd1498Szrj       if (dump_enabled_p ())
218138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
218238fd1498Szrj 			 "bad data access.\n");
218338fd1498Szrj       return false;
218438fd1498Szrj     }
218538fd1498Szrj 
218638fd1498Szrj   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
218738fd1498Szrj 
218838fd1498Szrj   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
218938fd1498Szrj   if (!ok)
219038fd1498Szrj     {
219138fd1498Szrj       if (dump_enabled_p ())
219238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
219338fd1498Szrj 			 "unexpected pattern.\n");
219438fd1498Szrj       return false;
219538fd1498Szrj     }
219638fd1498Szrj 
219738fd1498Szrj   /* While the rest of the analysis below depends on it in some way.  */
219838fd1498Szrj   fatal = false;
219938fd1498Szrj 
220038fd1498Szrj   /* Analyze data dependences between the data-refs in the loop
220138fd1498Szrj      and adjust the maximum vectorization factor according to
220238fd1498Szrj      the dependences.
220338fd1498Szrj      FORNOW: fail at the first data dependence that we encounter.  */
220438fd1498Szrj 
220538fd1498Szrj   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
220638fd1498Szrj   if (!ok
220738fd1498Szrj       || (max_vf != MAX_VECTORIZATION_FACTOR
220838fd1498Szrj 	  && maybe_lt (max_vf, min_vf)))
220938fd1498Szrj     {
221038fd1498Szrj       if (dump_enabled_p ())
221138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
221238fd1498Szrj 			     "bad data dependence.\n");
221338fd1498Szrj       return false;
221438fd1498Szrj     }
221538fd1498Szrj   LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf;
221638fd1498Szrj 
221738fd1498Szrj   ok = vect_determine_vectorization_factor (loop_vinfo);
221838fd1498Szrj   if (!ok)
221938fd1498Szrj     {
222038fd1498Szrj       if (dump_enabled_p ())
222138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
222238fd1498Szrj 			 "can't determine vectorization factor.\n");
222338fd1498Szrj       return false;
222438fd1498Szrj     }
222538fd1498Szrj   if (max_vf != MAX_VECTORIZATION_FACTOR
222638fd1498Szrj       && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
222738fd1498Szrj     {
222838fd1498Szrj       if (dump_enabled_p ())
222938fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
223038fd1498Szrj 			 "bad data dependence.\n");
223138fd1498Szrj       return false;
223238fd1498Szrj     }
223338fd1498Szrj 
223438fd1498Szrj   /* Compute the scalar iteration cost.  */
223538fd1498Szrj   vect_compute_single_scalar_iteration_cost (loop_vinfo);
223638fd1498Szrj 
223738fd1498Szrj   poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
223838fd1498Szrj   unsigned th;
223938fd1498Szrj 
224038fd1498Szrj   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
224138fd1498Szrj   ok = vect_analyze_slp (loop_vinfo, n_stmts);
224238fd1498Szrj   if (!ok)
224338fd1498Szrj     return false;
224438fd1498Szrj 
224538fd1498Szrj   /* If there are any SLP instances mark them as pure_slp.  */
224638fd1498Szrj   bool slp = vect_make_slp_decision (loop_vinfo);
224738fd1498Szrj   if (slp)
224838fd1498Szrj     {
224938fd1498Szrj       /* Find stmts that need to be both vectorized and SLPed.  */
225038fd1498Szrj       vect_detect_hybrid_slp (loop_vinfo);
225138fd1498Szrj 
225238fd1498Szrj       /* Update the vectorization factor based on the SLP decision.  */
225338fd1498Szrj       vect_update_vf_for_slp (loop_vinfo);
225438fd1498Szrj     }
225538fd1498Szrj 
225638fd1498Szrj   bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo);
225738fd1498Szrj 
225838fd1498Szrj   /* We don't expect to have to roll back to anything other than an empty
225938fd1498Szrj      set of rgroups.  */
226038fd1498Szrj   gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
226138fd1498Szrj 
226238fd1498Szrj   /* This is the point where we can re-start analysis with SLP forced off.  */
226338fd1498Szrj start_over:
226438fd1498Szrj 
226538fd1498Szrj   /* Now the vectorization factor is final.  */
226638fd1498Szrj   poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
226738fd1498Szrj   gcc_assert (known_ne (vectorization_factor, 0U));
226838fd1498Szrj 
226938fd1498Szrj   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
227038fd1498Szrj     {
227138fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
227238fd1498Szrj 		       "vectorization_factor = ");
227338fd1498Szrj       dump_dec (MSG_NOTE, vectorization_factor);
227438fd1498Szrj       dump_printf (MSG_NOTE, ", niters = " HOST_WIDE_INT_PRINT_DEC "\n",
227538fd1498Szrj 		   LOOP_VINFO_INT_NITERS (loop_vinfo));
227638fd1498Szrj     }
227738fd1498Szrj 
227838fd1498Szrj   HOST_WIDE_INT max_niter
227938fd1498Szrj     = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
228038fd1498Szrj 
228138fd1498Szrj   /* Analyze the alignment of the data-refs in the loop.
228238fd1498Szrj      Fail if a data reference is found that cannot be vectorized.  */
228338fd1498Szrj 
228438fd1498Szrj   ok = vect_analyze_data_refs_alignment (loop_vinfo);
228538fd1498Szrj   if (!ok)
228638fd1498Szrj     {
228738fd1498Szrj       if (dump_enabled_p ())
228838fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
228938fd1498Szrj 			 "bad data alignment.\n");
229038fd1498Szrj       return false;
229138fd1498Szrj     }
229238fd1498Szrj 
229338fd1498Szrj   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
229438fd1498Szrj      It is important to call pruning after vect_analyze_data_ref_accesses,
229538fd1498Szrj      since we use grouping information gathered by interleaving analysis.  */
229638fd1498Szrj   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
229738fd1498Szrj   if (!ok)
229838fd1498Szrj     return false;
229938fd1498Szrj 
230038fd1498Szrj   /* Do not invoke vect_enhance_data_refs_alignment for eplilogue
230138fd1498Szrj      vectorization.  */
230238fd1498Szrj   if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
230338fd1498Szrj     {
230438fd1498Szrj     /* This pass will decide on using loop versioning and/or loop peeling in
230538fd1498Szrj        order to enhance the alignment of data references in the loop.  */
230638fd1498Szrj     ok = vect_enhance_data_refs_alignment (loop_vinfo);
230738fd1498Szrj     if (!ok)
230838fd1498Szrj       {
230938fd1498Szrj 	if (dump_enabled_p ())
231038fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
231138fd1498Szrj 			   "bad data alignment.\n");
231238fd1498Szrj         return false;
231338fd1498Szrj       }
231438fd1498Szrj     }
231538fd1498Szrj 
231638fd1498Szrj   if (slp)
231738fd1498Szrj     {
231838fd1498Szrj       /* Analyze operations in the SLP instances.  Note this may
231938fd1498Szrj 	 remove unsupported SLP instances which makes the above
232038fd1498Szrj 	 SLP kind detection invalid.  */
232138fd1498Szrj       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
232238fd1498Szrj       vect_slp_analyze_operations (loop_vinfo);
232338fd1498Szrj       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
232438fd1498Szrj 	goto again;
232538fd1498Szrj     }
232638fd1498Szrj 
232738fd1498Szrj   /* Scan all the remaining operations in the loop that are not subject
232838fd1498Szrj      to SLP and make sure they are vectorizable.  */
232938fd1498Szrj   ok = vect_analyze_loop_operations (loop_vinfo);
233038fd1498Szrj   if (!ok)
233138fd1498Szrj     {
233238fd1498Szrj       if (dump_enabled_p ())
233338fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
233438fd1498Szrj 			 "bad operation or unsupported loop bound.\n");
233538fd1498Szrj       return false;
233638fd1498Szrj     }
233738fd1498Szrj 
233838fd1498Szrj   /* Decide whether to use a fully-masked loop for this vectorization
233938fd1498Szrj      factor.  */
234038fd1498Szrj   LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
234138fd1498Szrj     = (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
234238fd1498Szrj        && vect_verify_full_masking (loop_vinfo));
234338fd1498Szrj   if (dump_enabled_p ())
234438fd1498Szrj     {
234538fd1498Szrj       if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
234638fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
234738fd1498Szrj 			 "using a fully-masked loop.\n");
234838fd1498Szrj       else
234938fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
235038fd1498Szrj 			 "not using a fully-masked loop.\n");
235138fd1498Szrj     }
235238fd1498Szrj 
235338fd1498Szrj   /* If epilog loop is required because of data accesses with gaps,
235438fd1498Szrj      one additional iteration needs to be peeled.  Check if there is
235538fd1498Szrj      enough iterations for vectorization.  */
235638fd1498Szrj   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
235738fd1498Szrj       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
235838fd1498Szrj       && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
235938fd1498Szrj     {
236038fd1498Szrj       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
236138fd1498Szrj       tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
236238fd1498Szrj 
236338fd1498Szrj       if (known_lt (wi::to_widest (scalar_niters), vf))
236438fd1498Szrj 	{
236538fd1498Szrj 	  if (dump_enabled_p ())
236638fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
236738fd1498Szrj 			     "loop has no enough iterations to support"
236838fd1498Szrj 			     " peeling for gaps.\n");
236938fd1498Szrj 	  return false;
237038fd1498Szrj 	}
237138fd1498Szrj     }
237238fd1498Szrj 
237338fd1498Szrj   /* Check the costings of the loop make vectorizing worthwhile.  */
237438fd1498Szrj   res = vect_analyze_loop_costing (loop_vinfo);
237538fd1498Szrj   if (res < 0)
237638fd1498Szrj     goto again;
237738fd1498Szrj   if (!res)
237838fd1498Szrj     {
237938fd1498Szrj       if (dump_enabled_p ())
238038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
238138fd1498Szrj 			 "Loop costings not worthwhile.\n");
238238fd1498Szrj       return false;
238338fd1498Szrj     }
238438fd1498Szrj 
238538fd1498Szrj   /* Decide whether we need to create an epilogue loop to handle
238638fd1498Szrj      remaining scalar iterations.  */
238738fd1498Szrj   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
238838fd1498Szrj 
238938fd1498Szrj   unsigned HOST_WIDE_INT const_vf;
239038fd1498Szrj   if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
239138fd1498Szrj     /* The main loop handles all iterations.  */
239238fd1498Szrj     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
239338fd1498Szrj   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2394*58e805e6Szrj 	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
239538fd1498Szrj     {
2396*58e805e6Szrj       /* Work out the (constant) number of iterations that need to be
2397*58e805e6Szrj 	 peeled for reasons other than niters.  */
2398*58e805e6Szrj       unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2399*58e805e6Szrj       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2400*58e805e6Szrj 	peel_niter += 1;
2401*58e805e6Szrj       if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
240238fd1498Szrj 		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
240338fd1498Szrj 	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
240438fd1498Szrj     }
240538fd1498Szrj   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2406*58e805e6Szrj 	   /* ??? When peeling for gaps but not alignment, we could
2407*58e805e6Szrj 	      try to check whether the (variable) niters is known to be
2408*58e805e6Szrj 	      VF * N + 1.  That's something of a niche case though.  */
2409*58e805e6Szrj 	   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
241038fd1498Szrj 	   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
241138fd1498Szrj 	   || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
241238fd1498Szrj 		< (unsigned) exact_log2 (const_vf))
241338fd1498Szrj 	       /* In case of versioning, check if the maximum number of
241438fd1498Szrj 		  iterations is greater than th.  If they are identical,
241538fd1498Szrj 		  the epilogue is unnecessary.  */
241638fd1498Szrj 	       && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
241738fd1498Szrj 		   || ((unsigned HOST_WIDE_INT) max_niter
241838fd1498Szrj 		       > (th / const_vf) * const_vf))))
241938fd1498Szrj     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
242038fd1498Szrj 
242138fd1498Szrj   /* If an epilogue loop is required make sure we can create one.  */
242238fd1498Szrj   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
242338fd1498Szrj       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
242438fd1498Szrj     {
242538fd1498Szrj       if (dump_enabled_p ())
242638fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
242738fd1498Szrj       if (!vect_can_advance_ivs_p (loop_vinfo)
242838fd1498Szrj 	  || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
242938fd1498Szrj 					   single_exit (LOOP_VINFO_LOOP
243038fd1498Szrj 							 (loop_vinfo))))
243138fd1498Szrj         {
243238fd1498Szrj           if (dump_enabled_p ())
243338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
243438fd1498Szrj 			     "not vectorized: can't create required "
243538fd1498Szrj 			     "epilog loop\n");
243638fd1498Szrj           goto again;
243738fd1498Szrj         }
243838fd1498Szrj     }
243938fd1498Szrj 
244038fd1498Szrj   /* During peeling, we need to check if number of loop iterations is
244138fd1498Szrj      enough for both peeled prolog loop and vector loop.  This check
244238fd1498Szrj      can be merged along with threshold check of loop versioning, so
244338fd1498Szrj      increase threshold for this case if necessary.  */
244438fd1498Szrj   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
244538fd1498Szrj     {
244638fd1498Szrj       poly_uint64 niters_th = 0;
244738fd1498Szrj 
244838fd1498Szrj       if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
244938fd1498Szrj 	{
245038fd1498Szrj 	  /* Niters for peeled prolog loop.  */
245138fd1498Szrj 	  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
245238fd1498Szrj 	    {
245338fd1498Szrj 	      struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
245438fd1498Szrj 	      tree vectype
245538fd1498Szrj 		= STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
245638fd1498Szrj 	      niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1;
245738fd1498Szrj 	    }
245838fd1498Szrj 	  else
245938fd1498Szrj 	    niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
246038fd1498Szrj 	}
246138fd1498Szrj 
246238fd1498Szrj       /* Niters for at least one iteration of vectorized loop.  */
246338fd1498Szrj       if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
246438fd1498Szrj 	niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
246538fd1498Szrj       /* One additional iteration because of peeling for gap.  */
246638fd1498Szrj       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
246738fd1498Szrj 	niters_th += 1;
246838fd1498Szrj       LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
246938fd1498Szrj     }
247038fd1498Szrj 
247138fd1498Szrj   gcc_assert (known_eq (vectorization_factor,
247238fd1498Szrj 			LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
247338fd1498Szrj 
247438fd1498Szrj   /* Ok to vectorize!  */
247538fd1498Szrj   return true;
247638fd1498Szrj 
247738fd1498Szrj again:
247838fd1498Szrj   /* Try again with SLP forced off but if we didn't do any SLP there is
247938fd1498Szrj      no point in re-trying.  */
248038fd1498Szrj   if (!slp)
248138fd1498Szrj     return false;
248238fd1498Szrj 
248338fd1498Szrj   /* If there are reduction chains re-trying will fail anyway.  */
248438fd1498Szrj   if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
248538fd1498Szrj     return false;
248638fd1498Szrj 
248738fd1498Szrj   /* Likewise if the grouped loads or stores in the SLP cannot be handled
248838fd1498Szrj      via interleaving or lane instructions.  */
248938fd1498Szrj   slp_instance instance;
249038fd1498Szrj   slp_tree node;
249138fd1498Szrj   unsigned i, j;
249238fd1498Szrj   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
249338fd1498Szrj     {
249438fd1498Szrj       stmt_vec_info vinfo;
249538fd1498Szrj       vinfo = vinfo_for_stmt
249638fd1498Szrj 	  (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
249738fd1498Szrj       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
249838fd1498Szrj 	continue;
249938fd1498Szrj       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
250038fd1498Szrj       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
250138fd1498Szrj       tree vectype = STMT_VINFO_VECTYPE (vinfo);
250238fd1498Szrj       if (! vect_store_lanes_supported (vectype, size, false)
250338fd1498Szrj 	 && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)
250438fd1498Szrj 	 && ! vect_grouped_store_supported (vectype, size))
250538fd1498Szrj        return false;
250638fd1498Szrj       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
250738fd1498Szrj 	{
250838fd1498Szrj 	  vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
250938fd1498Szrj 	  vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
251038fd1498Szrj 	  bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
251138fd1498Szrj 	  size = STMT_VINFO_GROUP_SIZE (vinfo);
251238fd1498Szrj 	  vectype = STMT_VINFO_VECTYPE (vinfo);
251338fd1498Szrj 	  if (! vect_load_lanes_supported (vectype, size, false)
251438fd1498Szrj 	      && ! vect_grouped_load_supported (vectype, single_element_p,
251538fd1498Szrj 						size))
251638fd1498Szrj 	    return false;
251738fd1498Szrj 	}
251838fd1498Szrj     }
251938fd1498Szrj 
252038fd1498Szrj   if (dump_enabled_p ())
252138fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
252238fd1498Szrj 		     "re-trying with SLP disabled\n");
252338fd1498Szrj 
252438fd1498Szrj   /* Roll back state appropriately.  No SLP this time.  */
252538fd1498Szrj   slp = false;
252638fd1498Szrj   /* Restore vectorization factor as it were without SLP.  */
252738fd1498Szrj   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
252838fd1498Szrj   /* Free the SLP instances.  */
252938fd1498Szrj   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
253038fd1498Szrj     vect_free_slp_instance (instance);
253138fd1498Szrj   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
253238fd1498Szrj   /* Reset SLP type to loop_vect on all stmts.  */
253338fd1498Szrj   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
253438fd1498Szrj     {
253538fd1498Szrj       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
253638fd1498Szrj       for (gimple_stmt_iterator si = gsi_start_phis (bb);
253738fd1498Szrj 	   !gsi_end_p (si); gsi_next (&si))
253838fd1498Szrj 	{
253938fd1498Szrj 	  stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
254038fd1498Szrj 	  STMT_SLP_TYPE (stmt_info) = loop_vect;
254138fd1498Szrj 	}
254238fd1498Szrj       for (gimple_stmt_iterator si = gsi_start_bb (bb);
254338fd1498Szrj 	   !gsi_end_p (si); gsi_next (&si))
254438fd1498Szrj 	{
254538fd1498Szrj 	  stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
254638fd1498Szrj 	  STMT_SLP_TYPE (stmt_info) = loop_vect;
254738fd1498Szrj 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254838fd1498Szrj 	    {
254938fd1498Szrj 	      stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
255038fd1498Szrj 	      STMT_SLP_TYPE (stmt_info) = loop_vect;
255138fd1498Szrj 	      for (gimple_stmt_iterator pi
255238fd1498Szrj 		     = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
255338fd1498Szrj 		   !gsi_end_p (pi); gsi_next (&pi))
255438fd1498Szrj 		{
255538fd1498Szrj 		  gimple *pstmt = gsi_stmt (pi);
255638fd1498Szrj 		  STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
255738fd1498Szrj 		}
255838fd1498Szrj 	    }
255938fd1498Szrj 	}
256038fd1498Szrj     }
256138fd1498Szrj   /* Free optimized alias test DDRS.  */
256238fd1498Szrj   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (0);
256338fd1498Szrj   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
256438fd1498Szrj   LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
256538fd1498Szrj   /* Reset target cost data.  */
256638fd1498Szrj   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
256738fd1498Szrj   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
256838fd1498Szrj     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
256938fd1498Szrj   /* Reset accumulated rgroup information.  */
257038fd1498Szrj   release_vec_loop_masks (&LOOP_VINFO_MASKS (loop_vinfo));
257138fd1498Szrj   /* Reset assorted flags.  */
257238fd1498Szrj   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
257338fd1498Szrj   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
257438fd1498Szrj   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
257538fd1498Szrj   LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
257638fd1498Szrj   LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p;
257738fd1498Szrj 
257838fd1498Szrj   goto start_over;
257938fd1498Szrj }
258038fd1498Szrj 
258138fd1498Szrj /* Function vect_analyze_loop.
258238fd1498Szrj 
258338fd1498Szrj    Apply a set of analyses on LOOP, and create a loop_vec_info struct
258438fd1498Szrj    for it.  The different analyses will record information in the
258538fd1498Szrj    loop_vec_info struct.  If ORIG_LOOP_VINFO is not NULL epilogue must
258638fd1498Szrj    be vectorized.  */
258738fd1498Szrj loop_vec_info
vect_analyze_loop(struct loop * loop,loop_vec_info orig_loop_vinfo)258838fd1498Szrj vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
258938fd1498Szrj {
259038fd1498Szrj   loop_vec_info loop_vinfo;
259138fd1498Szrj   auto_vector_sizes vector_sizes;
259238fd1498Szrj 
259338fd1498Szrj   /* Autodetect first vector size we try.  */
259438fd1498Szrj   current_vector_size = 0;
259538fd1498Szrj   targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
259638fd1498Szrj   unsigned int next_size = 0;
259738fd1498Szrj 
259838fd1498Szrj   if (dump_enabled_p ())
259938fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
260038fd1498Szrj 		     "===== analyze_loop_nest =====\n");
260138fd1498Szrj 
260238fd1498Szrj   if (loop_outer (loop)
260338fd1498Szrj       && loop_vec_info_for_loop (loop_outer (loop))
260438fd1498Szrj       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
260538fd1498Szrj     {
260638fd1498Szrj       if (dump_enabled_p ())
260738fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
260838fd1498Szrj 			 "outer-loop already vectorized.\n");
260938fd1498Szrj       return NULL;
261038fd1498Szrj     }
261138fd1498Szrj 
261238fd1498Szrj   poly_uint64 autodetected_vector_size = 0;
261338fd1498Szrj   while (1)
261438fd1498Szrj     {
261538fd1498Szrj       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
261638fd1498Szrj       loop_vinfo = vect_analyze_loop_form (loop);
261738fd1498Szrj       if (!loop_vinfo)
261838fd1498Szrj 	{
261938fd1498Szrj 	  if (dump_enabled_p ())
262038fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
262138fd1498Szrj 			     "bad loop form.\n");
262238fd1498Szrj 	  return NULL;
262338fd1498Szrj 	}
262438fd1498Szrj 
262538fd1498Szrj       bool fatal = false;
262638fd1498Szrj 
262738fd1498Szrj       if (orig_loop_vinfo)
262838fd1498Szrj 	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
262938fd1498Szrj 
263038fd1498Szrj       if (vect_analyze_loop_2 (loop_vinfo, fatal))
263138fd1498Szrj 	{
263238fd1498Szrj 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
263338fd1498Szrj 
263438fd1498Szrj 	  return loop_vinfo;
263538fd1498Szrj 	}
263638fd1498Szrj 
263738fd1498Szrj       delete loop_vinfo;
263838fd1498Szrj 
263938fd1498Szrj       if (next_size == 0)
264038fd1498Szrj 	autodetected_vector_size = current_vector_size;
264138fd1498Szrj 
264238fd1498Szrj       if (next_size < vector_sizes.length ()
264338fd1498Szrj 	  && known_eq (vector_sizes[next_size], autodetected_vector_size))
264438fd1498Szrj 	next_size += 1;
264538fd1498Szrj 
264638fd1498Szrj       if (fatal
264738fd1498Szrj 	  || next_size == vector_sizes.length ()
264838fd1498Szrj 	  || known_eq (current_vector_size, 0U))
264938fd1498Szrj 	return NULL;
265038fd1498Szrj 
265138fd1498Szrj       /* Try the next biggest vector size.  */
265238fd1498Szrj       current_vector_size = vector_sizes[next_size++];
265338fd1498Szrj       if (dump_enabled_p ())
265438fd1498Szrj 	{
265538fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
265638fd1498Szrj 			   "***** Re-trying analysis with "
265738fd1498Szrj 			   "vector size ");
265838fd1498Szrj 	  dump_dec (MSG_NOTE, current_vector_size);
265938fd1498Szrj 	  dump_printf (MSG_NOTE, "\n");
266038fd1498Szrj 	}
266138fd1498Szrj     }
266238fd1498Szrj }
266338fd1498Szrj 
266438fd1498Szrj /* Return true if there is an in-order reduction function for CODE, storing
266538fd1498Szrj    it in *REDUC_FN if so.  */
266638fd1498Szrj 
266738fd1498Szrj static bool
fold_left_reduction_fn(tree_code code,internal_fn * reduc_fn)266838fd1498Szrj fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn)
266938fd1498Szrj {
267038fd1498Szrj   switch (code)
267138fd1498Szrj     {
267238fd1498Szrj     case PLUS_EXPR:
267338fd1498Szrj       *reduc_fn = IFN_FOLD_LEFT_PLUS;
267438fd1498Szrj       return true;
267538fd1498Szrj 
267638fd1498Szrj     default:
267738fd1498Szrj       return false;
267838fd1498Szrj     }
267938fd1498Szrj }
268038fd1498Szrj 
268138fd1498Szrj /* Function reduction_fn_for_scalar_code
268238fd1498Szrj 
268338fd1498Szrj    Input:
268438fd1498Szrj    CODE - tree_code of a reduction operations.
268538fd1498Szrj 
268638fd1498Szrj    Output:
268738fd1498Szrj    REDUC_FN - the corresponding internal function to be used to reduce the
268838fd1498Szrj       vector of partial results into a single scalar result, or IFN_LAST
268938fd1498Szrj       if the operation is a supported reduction operation, but does not have
269038fd1498Szrj       such an internal function.
269138fd1498Szrj 
269238fd1498Szrj    Return FALSE if CODE currently cannot be vectorized as reduction.  */
269338fd1498Szrj 
269438fd1498Szrj static bool
reduction_fn_for_scalar_code(enum tree_code code,internal_fn * reduc_fn)269538fd1498Szrj reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
269638fd1498Szrj {
269738fd1498Szrj   switch (code)
269838fd1498Szrj     {
269938fd1498Szrj       case MAX_EXPR:
270038fd1498Szrj         *reduc_fn = IFN_REDUC_MAX;
270138fd1498Szrj         return true;
270238fd1498Szrj 
270338fd1498Szrj       case MIN_EXPR:
270438fd1498Szrj         *reduc_fn = IFN_REDUC_MIN;
270538fd1498Szrj         return true;
270638fd1498Szrj 
270738fd1498Szrj       case PLUS_EXPR:
270838fd1498Szrj         *reduc_fn = IFN_REDUC_PLUS;
270938fd1498Szrj         return true;
271038fd1498Szrj 
271138fd1498Szrj       case BIT_AND_EXPR:
271238fd1498Szrj 	*reduc_fn = IFN_REDUC_AND;
271338fd1498Szrj 	return true;
271438fd1498Szrj 
271538fd1498Szrj       case BIT_IOR_EXPR:
271638fd1498Szrj 	*reduc_fn = IFN_REDUC_IOR;
271738fd1498Szrj 	return true;
271838fd1498Szrj 
271938fd1498Szrj       case BIT_XOR_EXPR:
272038fd1498Szrj 	*reduc_fn = IFN_REDUC_XOR;
272138fd1498Szrj 	return true;
272238fd1498Szrj 
272338fd1498Szrj       case MULT_EXPR:
272438fd1498Szrj       case MINUS_EXPR:
272538fd1498Szrj         *reduc_fn = IFN_LAST;
272638fd1498Szrj         return true;
272738fd1498Szrj 
272838fd1498Szrj       default:
272938fd1498Szrj        return false;
273038fd1498Szrj     }
273138fd1498Szrj }
273238fd1498Szrj 
273338fd1498Szrj /* If there is a neutral value X such that SLP reduction NODE would not
273438fd1498Szrj    be affected by the introduction of additional X elements, return that X,
273538fd1498Szrj    otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
273638fd1498Szrj    is true if the SLP statements perform a single reduction, false if each
273738fd1498Szrj    statement performs an independent reduction.  */
273838fd1498Szrj 
273938fd1498Szrj static tree
neutral_op_for_slp_reduction(slp_tree slp_node,tree_code code,bool reduc_chain)274038fd1498Szrj neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
274138fd1498Szrj 			      bool reduc_chain)
274238fd1498Szrj {
274338fd1498Szrj   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
274438fd1498Szrj   gimple *stmt = stmts[0];
274538fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
274638fd1498Szrj   tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
274738fd1498Szrj   tree scalar_type = TREE_TYPE (vector_type);
274838fd1498Szrj   struct loop *loop = gimple_bb (stmt)->loop_father;
274938fd1498Szrj   gcc_assert (loop);
275038fd1498Szrj 
275138fd1498Szrj   switch (code)
275238fd1498Szrj     {
275338fd1498Szrj     case WIDEN_SUM_EXPR:
275438fd1498Szrj     case DOT_PROD_EXPR:
275538fd1498Szrj     case SAD_EXPR:
275638fd1498Szrj     case PLUS_EXPR:
275738fd1498Szrj     case MINUS_EXPR:
275838fd1498Szrj     case BIT_IOR_EXPR:
275938fd1498Szrj     case BIT_XOR_EXPR:
276038fd1498Szrj       return build_zero_cst (scalar_type);
276138fd1498Szrj 
276238fd1498Szrj     case MULT_EXPR:
276338fd1498Szrj       return build_one_cst (scalar_type);
276438fd1498Szrj 
276538fd1498Szrj     case BIT_AND_EXPR:
276638fd1498Szrj       return build_all_ones_cst (scalar_type);
276738fd1498Szrj 
276838fd1498Szrj     case MAX_EXPR:
276938fd1498Szrj     case MIN_EXPR:
277038fd1498Szrj       /* For MIN/MAX the initial values are neutral.  A reduction chain
277138fd1498Szrj 	 has only a single initial value, so that value is neutral for
277238fd1498Szrj 	 all statements.  */
277338fd1498Szrj       if (reduc_chain)
277438fd1498Szrj 	return PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
277538fd1498Szrj       return NULL_TREE;
277638fd1498Szrj 
277738fd1498Szrj     default:
277838fd1498Szrj       return NULL_TREE;
277938fd1498Szrj     }
278038fd1498Szrj }
278138fd1498Szrj 
278238fd1498Szrj /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
278338fd1498Szrj    STMT is printed with a message MSG. */
278438fd1498Szrj 
278538fd1498Szrj static void
report_vect_op(dump_flags_t msg_type,gimple * stmt,const char * msg)278638fd1498Szrj report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
278738fd1498Szrj {
278838fd1498Szrj   dump_printf_loc (msg_type, vect_location, "%s", msg);
278938fd1498Szrj   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
279038fd1498Szrj }
279138fd1498Szrj 
279238fd1498Szrj 
279338fd1498Szrj /* Detect SLP reduction of the form:
279438fd1498Szrj 
279538fd1498Szrj    #a1 = phi <a5, a0>
279638fd1498Szrj    a2 = operation (a1)
279738fd1498Szrj    a3 = operation (a2)
279838fd1498Szrj    a4 = operation (a3)
279938fd1498Szrj    a5 = operation (a4)
280038fd1498Szrj 
280138fd1498Szrj    #a = phi <a5>
280238fd1498Szrj 
280338fd1498Szrj    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
280438fd1498Szrj    FIRST_STMT is the first reduction stmt in the chain
280538fd1498Szrj    (a2 = operation (a1)).
280638fd1498Szrj 
280738fd1498Szrj    Return TRUE if a reduction chain was detected.  */
280838fd1498Szrj 
280938fd1498Szrj static bool
vect_is_slp_reduction(loop_vec_info loop_info,gimple * phi,gimple * first_stmt)281038fd1498Szrj vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
281138fd1498Szrj 		       gimple *first_stmt)
281238fd1498Szrj {
281338fd1498Szrj   struct loop *loop = (gimple_bb (phi))->loop_father;
281438fd1498Szrj   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
281538fd1498Szrj   enum tree_code code;
2816*58e805e6Szrj   gimple *loop_use_stmt = NULL;
2817*58e805e6Szrj   stmt_vec_info use_stmt_info;
281838fd1498Szrj   tree lhs;
281938fd1498Szrj   imm_use_iterator imm_iter;
282038fd1498Szrj   use_operand_p use_p;
282138fd1498Szrj   int nloop_uses, size = 0, n_out_of_loop_uses;
282238fd1498Szrj   bool found = false;
282338fd1498Szrj 
282438fd1498Szrj   if (loop != vect_loop)
282538fd1498Szrj     return false;
282638fd1498Szrj 
2827*58e805e6Szrj   auto_vec<stmt_vec_info, 8> reduc_chain;
282838fd1498Szrj   lhs = PHI_RESULT (phi);
282938fd1498Szrj   code = gimple_assign_rhs_code (first_stmt);
283038fd1498Szrj   while (1)
283138fd1498Szrj     {
283238fd1498Szrj       nloop_uses = 0;
283338fd1498Szrj       n_out_of_loop_uses = 0;
283438fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
283538fd1498Szrj         {
283638fd1498Szrj 	  gimple *use_stmt = USE_STMT (use_p);
283738fd1498Szrj 	  if (is_gimple_debug (use_stmt))
283838fd1498Szrj 	    continue;
283938fd1498Szrj 
284038fd1498Szrj           /* Check if we got back to the reduction phi.  */
284138fd1498Szrj 	  if (use_stmt == phi)
284238fd1498Szrj             {
284338fd1498Szrj 	      loop_use_stmt = use_stmt;
284438fd1498Szrj               found = true;
284538fd1498Szrj               break;
284638fd1498Szrj             }
284738fd1498Szrj 
284838fd1498Szrj           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
284938fd1498Szrj             {
285038fd1498Szrj 	      loop_use_stmt = use_stmt;
285138fd1498Szrj 	      nloop_uses++;
285238fd1498Szrj             }
285338fd1498Szrj            else
285438fd1498Szrj              n_out_of_loop_uses++;
285538fd1498Szrj 
285638fd1498Szrj            /* There are can be either a single use in the loop or two uses in
285738fd1498Szrj               phi nodes.  */
285838fd1498Szrj            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
285938fd1498Szrj              return false;
286038fd1498Szrj         }
286138fd1498Szrj 
286238fd1498Szrj       if (found)
286338fd1498Szrj         break;
286438fd1498Szrj 
286538fd1498Szrj       /* We reached a statement with no loop uses.  */
286638fd1498Szrj       if (nloop_uses == 0)
286738fd1498Szrj 	return false;
286838fd1498Szrj 
286938fd1498Szrj       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
287038fd1498Szrj       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
287138fd1498Szrj         return false;
287238fd1498Szrj 
287338fd1498Szrj       if (!is_gimple_assign (loop_use_stmt)
287438fd1498Szrj 	  || code != gimple_assign_rhs_code (loop_use_stmt)
287538fd1498Szrj 	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
287638fd1498Szrj         return false;
287738fd1498Szrj 
287838fd1498Szrj       /* Insert USE_STMT into reduction chain.  */
287938fd1498Szrj       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2880*58e805e6Szrj       reduc_chain.safe_push (use_stmt_info);
288138fd1498Szrj 
288238fd1498Szrj       lhs = gimple_assign_lhs (loop_use_stmt);
288338fd1498Szrj       size++;
288438fd1498Szrj    }
288538fd1498Szrj 
288638fd1498Szrj   if (!found || loop_use_stmt != phi || size < 2)
288738fd1498Szrj     return false;
288838fd1498Szrj 
288938fd1498Szrj   /* Swap the operands, if needed, to make the reduction operand be the second
289038fd1498Szrj      operand.  */
289138fd1498Szrj   lhs = PHI_RESULT (phi);
2892*58e805e6Szrj   for (unsigned i = 0; i < reduc_chain.length (); ++i)
289338fd1498Szrj     {
2894*58e805e6Szrj       gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
289538fd1498Szrj       if (gimple_assign_rhs2 (next_stmt) == lhs)
289638fd1498Szrj 	{
289738fd1498Szrj 	  tree op = gimple_assign_rhs1 (next_stmt);
289838fd1498Szrj 	  gimple *def_stmt = NULL;
289938fd1498Szrj 
290038fd1498Szrj           if (TREE_CODE (op) == SSA_NAME)
290138fd1498Szrj             def_stmt = SSA_NAME_DEF_STMT (op);
290238fd1498Szrj 
290338fd1498Szrj 	  /* Check that the other def is either defined in the loop
290438fd1498Szrj 	     ("vect_internal_def"), or it's an induction (defined by a
290538fd1498Szrj 	     loop-header phi-node).  */
290638fd1498Szrj           if (def_stmt
290738fd1498Szrj               && gimple_bb (def_stmt)
290838fd1498Szrj 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
290938fd1498Szrj               && (is_gimple_assign (def_stmt)
291038fd1498Szrj                   || is_gimple_call (def_stmt)
291138fd1498Szrj                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
291238fd1498Szrj                            == vect_induction_def
291338fd1498Szrj                   || (gimple_code (def_stmt) == GIMPLE_PHI
291438fd1498Szrj                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
291538fd1498Szrj                                   == vect_internal_def
291638fd1498Szrj                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
291738fd1498Szrj 	    {
291838fd1498Szrj 	      lhs = gimple_assign_lhs (next_stmt);
291938fd1498Szrj  	      continue;
292038fd1498Szrj 	    }
292138fd1498Szrj 
292238fd1498Szrj 	  return false;
292338fd1498Szrj 	}
292438fd1498Szrj       else
292538fd1498Szrj 	{
292638fd1498Szrj           tree op = gimple_assign_rhs2 (next_stmt);
292738fd1498Szrj 	  gimple *def_stmt = NULL;
292838fd1498Szrj 
292938fd1498Szrj           if (TREE_CODE (op) == SSA_NAME)
293038fd1498Szrj             def_stmt = SSA_NAME_DEF_STMT (op);
293138fd1498Szrj 
293238fd1498Szrj           /* Check that the other def is either defined in the loop
293338fd1498Szrj             ("vect_internal_def"), or it's an induction (defined by a
293438fd1498Szrj             loop-header phi-node).  */
293538fd1498Szrj           if (def_stmt
293638fd1498Szrj               && gimple_bb (def_stmt)
293738fd1498Szrj 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
293838fd1498Szrj               && (is_gimple_assign (def_stmt)
293938fd1498Szrj                   || is_gimple_call (def_stmt)
294038fd1498Szrj                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
294138fd1498Szrj                               == vect_induction_def
294238fd1498Szrj                   || (gimple_code (def_stmt) == GIMPLE_PHI
294338fd1498Szrj                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
294438fd1498Szrj                                   == vect_internal_def
294538fd1498Szrj                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
294638fd1498Szrj   	    {
294738fd1498Szrj 	      if (dump_enabled_p ())
294838fd1498Szrj 		{
294938fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
295038fd1498Szrj 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
295138fd1498Szrj 		}
295238fd1498Szrj 
295338fd1498Szrj 	      swap_ssa_operands (next_stmt,
295438fd1498Szrj 	 		         gimple_assign_rhs1_ptr (next_stmt),
295538fd1498Szrj                                  gimple_assign_rhs2_ptr (next_stmt));
295638fd1498Szrj 	      update_stmt (next_stmt);
295738fd1498Szrj 
295838fd1498Szrj 	      if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
295938fd1498Szrj 		LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
296038fd1498Szrj 	    }
296138fd1498Szrj 	  else
296238fd1498Szrj 	    return false;
296338fd1498Szrj         }
296438fd1498Szrj 
296538fd1498Szrj       lhs = gimple_assign_lhs (next_stmt);
296638fd1498Szrj     }
296738fd1498Szrj 
2968*58e805e6Szrj   /* Build up the actual chain.  */
2969*58e805e6Szrj   for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
2970*58e805e6Szrj     {
2971*58e805e6Szrj       GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0]->stmt;
2972*58e805e6Szrj       GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1]->stmt;
2973*58e805e6Szrj     }
2974*58e805e6Szrj   GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0]->stmt;
2975*58e805e6Szrj   GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
2976*58e805e6Szrj 
297738fd1498Szrj   /* Save the chain for further analysis in SLP detection.  */
2978*58e805e6Szrj   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]->stmt);
2979*58e805e6Szrj   GROUP_SIZE (reduc_chain[0]) = size;
298038fd1498Szrj 
298138fd1498Szrj   return true;
298238fd1498Szrj }
298338fd1498Szrj 
298438fd1498Szrj /* Return true if we need an in-order reduction for operation CODE
298538fd1498Szrj    on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
298638fd1498Szrj    overflow must wrap.  */
298738fd1498Szrj 
298838fd1498Szrj static bool
needs_fold_left_reduction_p(tree type,tree_code code,bool need_wrapping_integral_overflow)298938fd1498Szrj needs_fold_left_reduction_p (tree type, tree_code code,
299038fd1498Szrj 			     bool need_wrapping_integral_overflow)
299138fd1498Szrj {
299238fd1498Szrj   /* CHECKME: check for !flag_finite_math_only too?  */
299338fd1498Szrj   if (SCALAR_FLOAT_TYPE_P (type))
299438fd1498Szrj     switch (code)
299538fd1498Szrj       {
299638fd1498Szrj       case MIN_EXPR:
299738fd1498Szrj       case MAX_EXPR:
299838fd1498Szrj 	return false;
299938fd1498Szrj 
300038fd1498Szrj       default:
300138fd1498Szrj 	return !flag_associative_math;
300238fd1498Szrj       }
300338fd1498Szrj 
300438fd1498Szrj   if (INTEGRAL_TYPE_P (type))
300538fd1498Szrj     {
300638fd1498Szrj       if (!operation_no_trapping_overflow (type, code))
300738fd1498Szrj 	return true;
300838fd1498Szrj       if (need_wrapping_integral_overflow
300938fd1498Szrj 	  && !TYPE_OVERFLOW_WRAPS (type)
301038fd1498Szrj 	  && operation_can_overflow (code))
301138fd1498Szrj 	return true;
301238fd1498Szrj       return false;
301338fd1498Szrj     }
301438fd1498Szrj 
301538fd1498Szrj   if (SAT_FIXED_POINT_TYPE_P (type))
301638fd1498Szrj     return true;
301738fd1498Szrj 
301838fd1498Szrj   return false;
301938fd1498Szrj }
302038fd1498Szrj 
302138fd1498Szrj /* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
302238fd1498Szrj    reduction operation CODE has a handled computation expression.  */
302338fd1498Szrj 
302438fd1498Szrj bool
check_reduction_path(location_t loc,loop_p loop,gphi * phi,tree loop_arg,enum tree_code code)302538fd1498Szrj check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg,
302638fd1498Szrj 		      enum tree_code code)
302738fd1498Szrj {
302838fd1498Szrj   auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
302938fd1498Szrj   auto_bitmap visited;
303038fd1498Szrj   tree lookfor = PHI_RESULT (phi);
303138fd1498Szrj   ssa_op_iter curri;
303238fd1498Szrj   use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
303338fd1498Szrj   while (USE_FROM_PTR (curr) != loop_arg)
303438fd1498Szrj     curr = op_iter_next_use (&curri);
303538fd1498Szrj   curri.i = curri.numops;
303638fd1498Szrj   do
303738fd1498Szrj     {
303838fd1498Szrj       path.safe_push (std::make_pair (curri, curr));
303938fd1498Szrj       tree use = USE_FROM_PTR (curr);
304038fd1498Szrj       if (use == lookfor)
304138fd1498Szrj 	break;
304238fd1498Szrj       gimple *def = SSA_NAME_DEF_STMT (use);
304338fd1498Szrj       if (gimple_nop_p (def)
304438fd1498Szrj 	  || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
304538fd1498Szrj 	{
304638fd1498Szrj pop:
304738fd1498Szrj 	  do
304838fd1498Szrj 	    {
304938fd1498Szrj 	      std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
305038fd1498Szrj 	      curri = x.first;
305138fd1498Szrj 	      curr = x.second;
305238fd1498Szrj 	      do
305338fd1498Szrj 		curr = op_iter_next_use (&curri);
305438fd1498Szrj 	      /* Skip already visited or non-SSA operands (from iterating
305538fd1498Szrj 	         over PHI args).  */
305638fd1498Szrj 	      while (curr != NULL_USE_OPERAND_P
305738fd1498Szrj 		     && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
305838fd1498Szrj 			 || ! bitmap_set_bit (visited,
305938fd1498Szrj 					      SSA_NAME_VERSION
306038fd1498Szrj 					        (USE_FROM_PTR (curr)))));
306138fd1498Szrj 	    }
306238fd1498Szrj 	  while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
306338fd1498Szrj 	  if (curr == NULL_USE_OPERAND_P)
306438fd1498Szrj 	    break;
306538fd1498Szrj 	}
306638fd1498Szrj       else
306738fd1498Szrj 	{
306838fd1498Szrj 	  if (gimple_code (def) == GIMPLE_PHI)
306938fd1498Szrj 	    curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
307038fd1498Szrj 	  else
307138fd1498Szrj 	    curr = op_iter_init_use (&curri, def, SSA_OP_USE);
307238fd1498Szrj 	  while (curr != NULL_USE_OPERAND_P
307338fd1498Szrj 		 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
307438fd1498Szrj 		     || ! bitmap_set_bit (visited,
307538fd1498Szrj 					  SSA_NAME_VERSION
307638fd1498Szrj 					    (USE_FROM_PTR (curr)))))
307738fd1498Szrj 	    curr = op_iter_next_use (&curri);
307838fd1498Szrj 	  if (curr == NULL_USE_OPERAND_P)
307938fd1498Szrj 	    goto pop;
308038fd1498Szrj 	}
308138fd1498Szrj     }
308238fd1498Szrj   while (1);
308338fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
308438fd1498Szrj     {
308538fd1498Szrj       dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
308638fd1498Szrj       unsigned i;
308738fd1498Szrj       std::pair<ssa_op_iter, use_operand_p> *x;
308838fd1498Szrj       FOR_EACH_VEC_ELT (path, i, x)
308938fd1498Szrj 	{
309038fd1498Szrj 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
309138fd1498Szrj 	  dump_printf (MSG_NOTE, " ");
309238fd1498Szrj 	}
309338fd1498Szrj       dump_printf (MSG_NOTE, "\n");
309438fd1498Szrj     }
309538fd1498Szrj 
309638fd1498Szrj   /* Check whether the reduction path detected is valid.  */
309738fd1498Szrj   bool fail = path.length () == 0;
309838fd1498Szrj   bool neg = false;
309938fd1498Szrj   for (unsigned i = 1; i < path.length (); ++i)
310038fd1498Szrj     {
310138fd1498Szrj       gimple *use_stmt = USE_STMT (path[i].second);
310238fd1498Szrj       tree op = USE_FROM_PTR (path[i].second);
310338fd1498Szrj       if (! has_single_use (op)
310438fd1498Szrj 	  || ! is_gimple_assign (use_stmt))
310538fd1498Szrj 	{
310638fd1498Szrj 	  fail = true;
310738fd1498Szrj 	  break;
310838fd1498Szrj 	}
310938fd1498Szrj       if (gimple_assign_rhs_code (use_stmt) != code)
311038fd1498Szrj 	{
311138fd1498Szrj 	  if (code == PLUS_EXPR
311238fd1498Szrj 	      && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
311338fd1498Szrj 	    {
311438fd1498Szrj 	      /* Track whether we negate the reduction value each iteration.  */
311538fd1498Szrj 	      if (gimple_assign_rhs2 (use_stmt) == op)
311638fd1498Szrj 		neg = ! neg;
311738fd1498Szrj 	    }
311838fd1498Szrj 	  else
311938fd1498Szrj 	    {
312038fd1498Szrj 	      fail = true;
312138fd1498Szrj 	      break;
312238fd1498Szrj 	    }
312338fd1498Szrj 	}
312438fd1498Szrj     }
312538fd1498Szrj   return ! fail && ! neg;
312638fd1498Szrj }
312738fd1498Szrj 
312838fd1498Szrj 
312938fd1498Szrj /* Function vect_is_simple_reduction
313038fd1498Szrj 
313138fd1498Szrj    (1) Detect a cross-iteration def-use cycle that represents a simple
313238fd1498Szrj    reduction computation.  We look for the following pattern:
313338fd1498Szrj 
313438fd1498Szrj    loop_header:
313538fd1498Szrj      a1 = phi < a0, a2 >
313638fd1498Szrj      a3 = ...
313738fd1498Szrj      a2 = operation (a3, a1)
313838fd1498Szrj 
313938fd1498Szrj    or
314038fd1498Szrj 
314138fd1498Szrj    a3 = ...
314238fd1498Szrj    loop_header:
314338fd1498Szrj      a1 = phi < a0, a2 >
314438fd1498Szrj      a2 = operation (a3, a1)
314538fd1498Szrj 
314638fd1498Szrj    such that:
314738fd1498Szrj    1. operation is commutative and associative and it is safe to
314838fd1498Szrj       change the order of the computation
314938fd1498Szrj    2. no uses for a2 in the loop (a2 is used out of the loop)
315038fd1498Szrj    3. no uses of a1 in the loop besides the reduction operation
315138fd1498Szrj    4. no uses of a1 outside the loop.
315238fd1498Szrj 
315338fd1498Szrj    Conditions 1,4 are tested here.
315438fd1498Szrj    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
315538fd1498Szrj 
315638fd1498Szrj    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
315738fd1498Szrj    nested cycles.
315838fd1498Szrj 
315938fd1498Szrj    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
316038fd1498Szrj    reductions:
316138fd1498Szrj 
316238fd1498Szrj      a1 = phi < a0, a2 >
316338fd1498Szrj      inner loop (def of a3)
316438fd1498Szrj      a2 = phi < a3 >
316538fd1498Szrj 
316638fd1498Szrj    (4) Detect condition expressions, ie:
316738fd1498Szrj      for (int i = 0; i < N; i++)
316838fd1498Szrj        if (a[i] < val)
316938fd1498Szrj 	ret_val = a[i];
317038fd1498Szrj 
317138fd1498Szrj */
317238fd1498Szrj 
317338fd1498Szrj static gimple *
vect_is_simple_reduction(loop_vec_info loop_info,gimple * phi,bool * double_reduc,bool need_wrapping_integral_overflow,enum vect_reduction_type * v_reduc_type)317438fd1498Szrj vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
317538fd1498Szrj 			  bool *double_reduc,
317638fd1498Szrj 			  bool need_wrapping_integral_overflow,
317738fd1498Szrj 			  enum vect_reduction_type *v_reduc_type)
317838fd1498Szrj {
317938fd1498Szrj   struct loop *loop = (gimple_bb (phi))->loop_father;
318038fd1498Szrj   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
318138fd1498Szrj   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
318238fd1498Szrj   enum tree_code orig_code, code;
318338fd1498Szrj   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
318438fd1498Szrj   tree type;
318538fd1498Szrj   int nloop_uses;
318638fd1498Szrj   tree name;
318738fd1498Szrj   imm_use_iterator imm_iter;
318838fd1498Szrj   use_operand_p use_p;
318938fd1498Szrj   bool phi_def;
319038fd1498Szrj 
319138fd1498Szrj   *double_reduc = false;
319238fd1498Szrj   *v_reduc_type = TREE_CODE_REDUCTION;
319338fd1498Szrj 
319438fd1498Szrj   tree phi_name = PHI_RESULT (phi);
319538fd1498Szrj   /* ???  If there are no uses of the PHI result the inner loop reduction
319638fd1498Szrj      won't be detected as possibly double-reduction by vectorizable_reduction
319738fd1498Szrj      because that tries to walk the PHI arg from the preheader edge which
319838fd1498Szrj      can be constant.  See PR60382.  */
319938fd1498Szrj   if (has_zero_uses (phi_name))
320038fd1498Szrj     return NULL;
320138fd1498Szrj   nloop_uses = 0;
320238fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
320338fd1498Szrj     {
320438fd1498Szrj       gimple *use_stmt = USE_STMT (use_p);
320538fd1498Szrj       if (is_gimple_debug (use_stmt))
320638fd1498Szrj 	continue;
320738fd1498Szrj 
320838fd1498Szrj       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
320938fd1498Szrj         {
321038fd1498Szrj           if (dump_enabled_p ())
321138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
321238fd1498Szrj 			     "intermediate value used outside loop.\n");
321338fd1498Szrj 
321438fd1498Szrj           return NULL;
321538fd1498Szrj         }
321638fd1498Szrj 
321738fd1498Szrj       nloop_uses++;
321838fd1498Szrj       if (nloop_uses > 1)
321938fd1498Szrj         {
322038fd1498Szrj           if (dump_enabled_p ())
322138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
322238fd1498Szrj 			     "reduction value used in loop.\n");
322338fd1498Szrj           return NULL;
322438fd1498Szrj         }
322538fd1498Szrj 
322638fd1498Szrj       phi_use_stmt = use_stmt;
322738fd1498Szrj     }
322838fd1498Szrj 
322938fd1498Szrj   edge latch_e = loop_latch_edge (loop);
323038fd1498Szrj   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
323138fd1498Szrj   if (TREE_CODE (loop_arg) != SSA_NAME)
323238fd1498Szrj     {
323338fd1498Szrj       if (dump_enabled_p ())
323438fd1498Szrj 	{
323538fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
323638fd1498Szrj 			   "reduction: not ssa_name: ");
323738fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
323838fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
323938fd1498Szrj 	}
324038fd1498Szrj       return NULL;
324138fd1498Szrj     }
324238fd1498Szrj 
324338fd1498Szrj   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
324438fd1498Szrj   if (is_gimple_assign (def_stmt))
324538fd1498Szrj     {
324638fd1498Szrj       name = gimple_assign_lhs (def_stmt);
324738fd1498Szrj       phi_def = false;
324838fd1498Szrj     }
324938fd1498Szrj   else if (gimple_code (def_stmt) == GIMPLE_PHI)
325038fd1498Szrj     {
325138fd1498Szrj       name = PHI_RESULT (def_stmt);
325238fd1498Szrj       phi_def = true;
325338fd1498Szrj     }
325438fd1498Szrj   else
325538fd1498Szrj     {
325638fd1498Szrj       if (dump_enabled_p ())
325738fd1498Szrj 	{
325838fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
325938fd1498Szrj 			   "reduction: unhandled reduction operation: ");
326038fd1498Szrj 	  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
326138fd1498Szrj 	}
326238fd1498Szrj       return NULL;
326338fd1498Szrj     }
326438fd1498Szrj 
326538fd1498Szrj   if (! flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
326638fd1498Szrj     return NULL;
326738fd1498Szrj 
326838fd1498Szrj   nloop_uses = 0;
326938fd1498Szrj   auto_vec<gphi *, 3> lcphis;
327038fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
327138fd1498Szrj     {
327238fd1498Szrj       gimple *use_stmt = USE_STMT (use_p);
327338fd1498Szrj       if (is_gimple_debug (use_stmt))
327438fd1498Szrj 	continue;
327538fd1498Szrj       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
327638fd1498Szrj 	nloop_uses++;
327738fd1498Szrj       else
327838fd1498Szrj 	/* We can have more than one loop-closed PHI.  */
327938fd1498Szrj 	lcphis.safe_push (as_a <gphi *> (use_stmt));
328038fd1498Szrj       if (nloop_uses > 1)
328138fd1498Szrj 	{
328238fd1498Szrj 	  if (dump_enabled_p ())
328338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
328438fd1498Szrj 			     "reduction used in loop.\n");
328538fd1498Szrj 	  return NULL;
328638fd1498Szrj 	}
328738fd1498Szrj     }
328838fd1498Szrj 
328938fd1498Szrj   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
329038fd1498Szrj      defined in the inner loop.  */
329138fd1498Szrj   if (phi_def)
329238fd1498Szrj     {
329338fd1498Szrj       op1 = PHI_ARG_DEF (def_stmt, 0);
329438fd1498Szrj 
329538fd1498Szrj       if (gimple_phi_num_args (def_stmt) != 1
329638fd1498Szrj           || TREE_CODE (op1) != SSA_NAME)
329738fd1498Szrj         {
329838fd1498Szrj           if (dump_enabled_p ())
329938fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
330038fd1498Szrj 			     "unsupported phi node definition.\n");
330138fd1498Szrj 
330238fd1498Szrj           return NULL;
330338fd1498Szrj         }
330438fd1498Szrj 
330538fd1498Szrj       def1 = SSA_NAME_DEF_STMT (op1);
330638fd1498Szrj       if (gimple_bb (def1)
330738fd1498Szrj 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
330838fd1498Szrj           && loop->inner
330938fd1498Szrj           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
331038fd1498Szrj           && is_gimple_assign (def1)
331138fd1498Szrj 	  && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
331238fd1498Szrj         {
331338fd1498Szrj           if (dump_enabled_p ())
331438fd1498Szrj             report_vect_op (MSG_NOTE, def_stmt,
331538fd1498Szrj 			    "detected double reduction: ");
331638fd1498Szrj 
331738fd1498Szrj           *double_reduc = true;
331838fd1498Szrj           return def_stmt;
331938fd1498Szrj         }
332038fd1498Szrj 
332138fd1498Szrj       return NULL;
332238fd1498Szrj     }
332338fd1498Szrj 
332438fd1498Szrj   /* If we are vectorizing an inner reduction we are executing that
332538fd1498Szrj      in the original order only in case we are not dealing with a
332638fd1498Szrj      double reduction.  */
332738fd1498Szrj   bool check_reduction = true;
332838fd1498Szrj   if (flow_loop_nested_p (vect_loop, loop))
332938fd1498Szrj     {
333038fd1498Szrj       gphi *lcphi;
333138fd1498Szrj       unsigned i;
333238fd1498Szrj       check_reduction = false;
333338fd1498Szrj       FOR_EACH_VEC_ELT (lcphis, i, lcphi)
333438fd1498Szrj 	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
333538fd1498Szrj 	  {
333638fd1498Szrj 	    gimple *use_stmt = USE_STMT (use_p);
333738fd1498Szrj 	    if (is_gimple_debug (use_stmt))
333838fd1498Szrj 	      continue;
333938fd1498Szrj 	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
334038fd1498Szrj 	      check_reduction = true;
334138fd1498Szrj 	  }
334238fd1498Szrj     }
334338fd1498Szrj 
334438fd1498Szrj   bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
334538fd1498Szrj   code = orig_code = gimple_assign_rhs_code (def_stmt);
334638fd1498Szrj 
334738fd1498Szrj   /* We can handle "res -= x[i]", which is non-associative by
334838fd1498Szrj      simply rewriting this into "res += -x[i]".  Avoid changing
334938fd1498Szrj      gimple instruction for the first simple tests and only do this
335038fd1498Szrj      if we're allowed to change code at all.  */
335138fd1498Szrj   if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
335238fd1498Szrj     code = PLUS_EXPR;
335338fd1498Szrj 
335438fd1498Szrj   if (code == COND_EXPR)
335538fd1498Szrj     {
335638fd1498Szrj       if (! nested_in_vect_loop)
335738fd1498Szrj 	*v_reduc_type = COND_REDUCTION;
335838fd1498Szrj 
335938fd1498Szrj       op3 = gimple_assign_rhs1 (def_stmt);
336038fd1498Szrj       if (COMPARISON_CLASS_P (op3))
336138fd1498Szrj         {
336238fd1498Szrj           op4 = TREE_OPERAND (op3, 1);
336338fd1498Szrj           op3 = TREE_OPERAND (op3, 0);
336438fd1498Szrj         }
336538fd1498Szrj       if (op3 == phi_name || op4 == phi_name)
336638fd1498Szrj 	{
336738fd1498Szrj 	  if (dump_enabled_p ())
336838fd1498Szrj 	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
336938fd1498Szrj 			    "reduction: condition depends on previous"
337038fd1498Szrj 			    " iteration: ");
337138fd1498Szrj 	  return NULL;
337238fd1498Szrj 	}
337338fd1498Szrj 
337438fd1498Szrj       op1 = gimple_assign_rhs2 (def_stmt);
337538fd1498Szrj       op2 = gimple_assign_rhs3 (def_stmt);
337638fd1498Szrj     }
337738fd1498Szrj   else if (!commutative_tree_code (code) || !associative_tree_code (code))
337838fd1498Szrj     {
337938fd1498Szrj       if (dump_enabled_p ())
338038fd1498Szrj 	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
338138fd1498Szrj 			"reduction: not commutative/associative: ");
338238fd1498Szrj       return NULL;
338338fd1498Szrj     }
338438fd1498Szrj   else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
338538fd1498Szrj     {
338638fd1498Szrj       op1 = gimple_assign_rhs1 (def_stmt);
338738fd1498Szrj       op2 = gimple_assign_rhs2 (def_stmt);
338838fd1498Szrj     }
338938fd1498Szrj   else
339038fd1498Szrj     {
339138fd1498Szrj       if (dump_enabled_p ())
339238fd1498Szrj 	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
339338fd1498Szrj 			"reduction: not handled operation: ");
339438fd1498Szrj       return NULL;
339538fd1498Szrj     }
339638fd1498Szrj 
339738fd1498Szrj   if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
339838fd1498Szrj     {
339938fd1498Szrj       if (dump_enabled_p ())
340038fd1498Szrj 	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
340138fd1498Szrj 			"reduction: both uses not ssa_names: ");
340238fd1498Szrj 
340338fd1498Szrj       return NULL;
340438fd1498Szrj     }
340538fd1498Szrj 
340638fd1498Szrj   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
340738fd1498Szrj   if ((TREE_CODE (op1) == SSA_NAME
340838fd1498Szrj        && !types_compatible_p (type,TREE_TYPE (op1)))
340938fd1498Szrj       || (TREE_CODE (op2) == SSA_NAME
341038fd1498Szrj           && !types_compatible_p (type, TREE_TYPE (op2)))
341138fd1498Szrj       || (op3 && TREE_CODE (op3) == SSA_NAME
341238fd1498Szrj           && !types_compatible_p (type, TREE_TYPE (op3)))
341338fd1498Szrj       || (op4 && TREE_CODE (op4) == SSA_NAME
341438fd1498Szrj           && !types_compatible_p (type, TREE_TYPE (op4))))
341538fd1498Szrj     {
341638fd1498Szrj       if (dump_enabled_p ())
341738fd1498Szrj         {
341838fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
341938fd1498Szrj 			   "reduction: multiple types: operation type: ");
342038fd1498Szrj           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
342138fd1498Szrj           dump_printf (MSG_NOTE, ", operands types: ");
342238fd1498Szrj           dump_generic_expr (MSG_NOTE, TDF_SLIM,
342338fd1498Szrj 			     TREE_TYPE (op1));
342438fd1498Szrj           dump_printf (MSG_NOTE, ",");
342538fd1498Szrj           dump_generic_expr (MSG_NOTE, TDF_SLIM,
342638fd1498Szrj 			     TREE_TYPE (op2));
342738fd1498Szrj           if (op3)
342838fd1498Szrj             {
342938fd1498Szrj               dump_printf (MSG_NOTE, ",");
343038fd1498Szrj               dump_generic_expr (MSG_NOTE, TDF_SLIM,
343138fd1498Szrj 				 TREE_TYPE (op3));
343238fd1498Szrj             }
343338fd1498Szrj 
343438fd1498Szrj           if (op4)
343538fd1498Szrj             {
343638fd1498Szrj               dump_printf (MSG_NOTE, ",");
343738fd1498Szrj               dump_generic_expr (MSG_NOTE, TDF_SLIM,
343838fd1498Szrj 				 TREE_TYPE (op4));
343938fd1498Szrj             }
344038fd1498Szrj           dump_printf (MSG_NOTE, "\n");
344138fd1498Szrj         }
344238fd1498Szrj 
344338fd1498Szrj       return NULL;
344438fd1498Szrj     }
344538fd1498Szrj 
344638fd1498Szrj   /* Check whether it's ok to change the order of the computation.
344738fd1498Szrj      Generally, when vectorizing a reduction we change the order of the
344838fd1498Szrj      computation.  This may change the behavior of the program in some
344938fd1498Szrj      cases, so we need to check that this is ok.  One exception is when
345038fd1498Szrj      vectorizing an outer-loop: the inner-loop is executed sequentially,
345138fd1498Szrj      and therefore vectorizing reductions in the inner-loop during
345238fd1498Szrj      outer-loop vectorization is safe.  */
345338fd1498Szrj   if (check_reduction
345438fd1498Szrj       && *v_reduc_type == TREE_CODE_REDUCTION
345538fd1498Szrj       && needs_fold_left_reduction_p (type, code,
345638fd1498Szrj 				      need_wrapping_integral_overflow))
345738fd1498Szrj     *v_reduc_type = FOLD_LEFT_REDUCTION;
345838fd1498Szrj 
345938fd1498Szrj   /* Reduction is safe. We're dealing with one of the following:
346038fd1498Szrj      1) integer arithmetic and no trapv
346138fd1498Szrj      2) floating point arithmetic, and special flags permit this optimization
346238fd1498Szrj      3) nested cycle (i.e., outer loop vectorization).  */
346338fd1498Szrj   if (TREE_CODE (op1) == SSA_NAME)
346438fd1498Szrj     def1 = SSA_NAME_DEF_STMT (op1);
346538fd1498Szrj 
346638fd1498Szrj   if (TREE_CODE (op2) == SSA_NAME)
346738fd1498Szrj     def2 = SSA_NAME_DEF_STMT (op2);
346838fd1498Szrj 
346938fd1498Szrj   if (code != COND_EXPR
347038fd1498Szrj       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
347138fd1498Szrj     {
347238fd1498Szrj       if (dump_enabled_p ())
347338fd1498Szrj 	report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
347438fd1498Szrj       return NULL;
347538fd1498Szrj     }
347638fd1498Szrj 
347738fd1498Szrj   /* Check that one def is the reduction def, defined by PHI,
347838fd1498Szrj      the other def is either defined in the loop ("vect_internal_def"),
347938fd1498Szrj      or it's an induction (defined by a loop-header phi-node).  */
348038fd1498Szrj 
348138fd1498Szrj   if (def2 && def2 == phi
348238fd1498Szrj       && (code == COND_EXPR
348338fd1498Szrj 	  || !def1 || gimple_nop_p (def1)
348438fd1498Szrj 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
348538fd1498Szrj           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
348638fd1498Szrj               && (is_gimple_assign (def1)
348738fd1498Szrj 		  || is_gimple_call (def1)
348838fd1498Szrj   	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
348938fd1498Szrj                       == vect_induction_def
349038fd1498Szrj    	          || (gimple_code (def1) == GIMPLE_PHI
349138fd1498Szrj 	              && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
349238fd1498Szrj                           == vect_internal_def
349338fd1498Szrj  	              && !is_loop_header_bb_p (gimple_bb (def1)))))))
349438fd1498Szrj     {
349538fd1498Szrj       if (dump_enabled_p ())
349638fd1498Szrj 	report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
349738fd1498Szrj       return def_stmt;
349838fd1498Szrj     }
349938fd1498Szrj 
350038fd1498Szrj   if (def1 && def1 == phi
350138fd1498Szrj       && (code == COND_EXPR
350238fd1498Szrj 	  || !def2 || gimple_nop_p (def2)
350338fd1498Szrj 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
350438fd1498Szrj 	  || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
350538fd1498Szrj 	      && (is_gimple_assign (def2)
350638fd1498Szrj 		  || is_gimple_call (def2)
350738fd1498Szrj 		  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
350838fd1498Szrj 		       == vect_induction_def
350938fd1498Szrj 		  || (gimple_code (def2) == GIMPLE_PHI
351038fd1498Szrj 		      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
351138fd1498Szrj 			   == vect_internal_def
351238fd1498Szrj 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
351338fd1498Szrj     {
351438fd1498Szrj       if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
351538fd1498Szrj 	{
351638fd1498Szrj 	  /* Check if we can swap operands (just for simplicity - so that
351738fd1498Szrj 	     the rest of the code can assume that the reduction variable
351838fd1498Szrj 	     is always the last (second) argument).  */
351938fd1498Szrj 	  if (code == COND_EXPR)
352038fd1498Szrj 	    {
352138fd1498Szrj 	      /* Swap cond_expr by inverting the condition.  */
352238fd1498Szrj 	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
352338fd1498Szrj 	      enum tree_code invert_code = ERROR_MARK;
352438fd1498Szrj 	      enum tree_code cond_code = TREE_CODE (cond_expr);
352538fd1498Szrj 
352638fd1498Szrj 	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
352738fd1498Szrj 		{
352838fd1498Szrj 		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
352938fd1498Szrj 		  invert_code = invert_tree_comparison (cond_code, honor_nans);
353038fd1498Szrj 		}
353138fd1498Szrj 	      if (invert_code != ERROR_MARK)
353238fd1498Szrj 		{
353338fd1498Szrj 		  TREE_SET_CODE (cond_expr, invert_code);
353438fd1498Szrj 		  swap_ssa_operands (def_stmt,
353538fd1498Szrj 				     gimple_assign_rhs2_ptr (def_stmt),
353638fd1498Szrj 				     gimple_assign_rhs3_ptr (def_stmt));
353738fd1498Szrj 		}
353838fd1498Szrj 	      else
353938fd1498Szrj 		{
354038fd1498Szrj 		  if (dump_enabled_p ())
354138fd1498Szrj 		    report_vect_op (MSG_NOTE, def_stmt,
354238fd1498Szrj 				    "detected reduction: cannot swap operands "
354338fd1498Szrj 				    "for cond_expr");
354438fd1498Szrj 		  return NULL;
354538fd1498Szrj 		}
354638fd1498Szrj 	    }
354738fd1498Szrj 	  else
354838fd1498Szrj 	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
354938fd1498Szrj 			       gimple_assign_rhs2_ptr (def_stmt));
355038fd1498Szrj 
355138fd1498Szrj 	  if (dump_enabled_p ())
355238fd1498Szrj 	    report_vect_op (MSG_NOTE, def_stmt,
355338fd1498Szrj 			    "detected reduction: need to swap operands: ");
355438fd1498Szrj 
355538fd1498Szrj 	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
355638fd1498Szrj 	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
355738fd1498Szrj         }
355838fd1498Szrj       else
355938fd1498Szrj         {
356038fd1498Szrj           if (dump_enabled_p ())
356138fd1498Szrj             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
356238fd1498Szrj         }
356338fd1498Szrj 
356438fd1498Szrj       return def_stmt;
356538fd1498Szrj     }
356638fd1498Szrj 
356738fd1498Szrj   /* Try to find SLP reduction chain.  */
356838fd1498Szrj   if (! nested_in_vect_loop
356938fd1498Szrj       && code != COND_EXPR
357038fd1498Szrj       && orig_code != MINUS_EXPR
357138fd1498Szrj       && vect_is_slp_reduction (loop_info, phi, def_stmt))
357238fd1498Szrj     {
357338fd1498Szrj       if (dump_enabled_p ())
357438fd1498Szrj         report_vect_op (MSG_NOTE, def_stmt,
357538fd1498Szrj 			"reduction: detected reduction chain: ");
357638fd1498Szrj 
357738fd1498Szrj       return def_stmt;
357838fd1498Szrj     }
357938fd1498Szrj 
358038fd1498Szrj   /* Dissolve group eventually half-built by vect_is_slp_reduction.  */
358138fd1498Szrj   gimple *first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (def_stmt));
358238fd1498Szrj   while (first)
358338fd1498Szrj     {
358438fd1498Szrj       gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first));
358538fd1498Szrj       GROUP_FIRST_ELEMENT (vinfo_for_stmt (first)) = NULL;
358638fd1498Szrj       GROUP_NEXT_ELEMENT (vinfo_for_stmt (first)) = NULL;
358738fd1498Szrj       first = next;
358838fd1498Szrj     }
358938fd1498Szrj 
359038fd1498Szrj   /* Look for the expression computing loop_arg from loop PHI result.  */
359138fd1498Szrj   if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), loop_arg,
359238fd1498Szrj 			    code))
359338fd1498Szrj     return def_stmt;
359438fd1498Szrj 
359538fd1498Szrj   if (dump_enabled_p ())
359638fd1498Szrj     {
359738fd1498Szrj       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
359838fd1498Szrj 		      "reduction: unknown pattern: ");
359938fd1498Szrj     }
360038fd1498Szrj 
360138fd1498Szrj   return NULL;
360238fd1498Szrj }
360338fd1498Szrj 
360438fd1498Szrj /* Wrapper around vect_is_simple_reduction, which will modify code
360538fd1498Szrj    in-place if it enables detection of more reductions.  Arguments
360638fd1498Szrj    as there.  */
360738fd1498Szrj 
360838fd1498Szrj gimple *
vect_force_simple_reduction(loop_vec_info loop_info,gimple * phi,bool * double_reduc,bool need_wrapping_integral_overflow)360938fd1498Szrj vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
361038fd1498Szrj 			     bool *double_reduc,
361138fd1498Szrj 			     bool need_wrapping_integral_overflow)
361238fd1498Szrj {
361338fd1498Szrj   enum vect_reduction_type v_reduc_type;
361438fd1498Szrj   gimple *def = vect_is_simple_reduction (loop_info, phi, double_reduc,
361538fd1498Szrj 					  need_wrapping_integral_overflow,
361638fd1498Szrj 					  &v_reduc_type);
361738fd1498Szrj   if (def)
361838fd1498Szrj     {
361938fd1498Szrj       stmt_vec_info reduc_def_info = vinfo_for_stmt (phi);
362038fd1498Szrj       STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type;
362138fd1498Szrj       STMT_VINFO_REDUC_DEF (reduc_def_info) = def;
362238fd1498Szrj       reduc_def_info = vinfo_for_stmt (def);
362338fd1498Szrj       STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type;
362438fd1498Szrj       STMT_VINFO_REDUC_DEF (reduc_def_info) = phi;
362538fd1498Szrj     }
362638fd1498Szrj   return def;
362738fd1498Szrj }
362838fd1498Szrj 
362938fd1498Szrj /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
363038fd1498Szrj int
vect_get_known_peeling_cost(loop_vec_info loop_vinfo,int peel_iters_prologue,int * peel_iters_epilogue,stmt_vector_for_cost * scalar_cost_vec,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * epilogue_cost_vec)363138fd1498Szrj vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
363238fd1498Szrj                              int *peel_iters_epilogue,
363338fd1498Szrj                              stmt_vector_for_cost *scalar_cost_vec,
363438fd1498Szrj 			     stmt_vector_for_cost *prologue_cost_vec,
363538fd1498Szrj 			     stmt_vector_for_cost *epilogue_cost_vec)
363638fd1498Szrj {
363738fd1498Szrj   int retval = 0;
363838fd1498Szrj   int assumed_vf = vect_vf_for_cost (loop_vinfo);
363938fd1498Szrj 
364038fd1498Szrj   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
364138fd1498Szrj     {
364238fd1498Szrj       *peel_iters_epilogue = assumed_vf / 2;
364338fd1498Szrj       if (dump_enabled_p ())
364438fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
364538fd1498Szrj 			 "cost model: epilogue peel iters set to vf/2 "
364638fd1498Szrj 			 "because loop iterations are unknown .\n");
364738fd1498Szrj 
364838fd1498Szrj       /* If peeled iterations are known but number of scalar loop
364938fd1498Szrj          iterations are unknown, count a taken branch per peeled loop.  */
365038fd1498Szrj       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
365138fd1498Szrj 				 NULL, 0, vect_prologue);
365238fd1498Szrj       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
365338fd1498Szrj 				 NULL, 0, vect_epilogue);
365438fd1498Szrj     }
365538fd1498Szrj   else
365638fd1498Szrj     {
365738fd1498Szrj       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
365838fd1498Szrj       peel_iters_prologue = niters < peel_iters_prologue ?
365938fd1498Szrj                             niters : peel_iters_prologue;
366038fd1498Szrj       *peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
366138fd1498Szrj       /* If we need to peel for gaps, but no peeling is required, we have to
366238fd1498Szrj 	 peel VF iterations.  */
366338fd1498Szrj       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
366438fd1498Szrj 	*peel_iters_epilogue = assumed_vf;
366538fd1498Szrj     }
366638fd1498Szrj 
366738fd1498Szrj   stmt_info_for_cost *si;
366838fd1498Szrj   int j;
366938fd1498Szrj   if (peel_iters_prologue)
367038fd1498Szrj     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
367138fd1498Szrj 	{
367238fd1498Szrj 	  stmt_vec_info stmt_info
367338fd1498Szrj 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
367438fd1498Szrj 	  retval += record_stmt_cost (prologue_cost_vec,
367538fd1498Szrj 				      si->count * peel_iters_prologue,
367638fd1498Szrj 				      si->kind, stmt_info, si->misalign,
367738fd1498Szrj 				      vect_prologue);
367838fd1498Szrj 	}
367938fd1498Szrj   if (*peel_iters_epilogue)
368038fd1498Szrj     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
368138fd1498Szrj 	{
368238fd1498Szrj 	  stmt_vec_info stmt_info
368338fd1498Szrj 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
368438fd1498Szrj 	  retval += record_stmt_cost (epilogue_cost_vec,
368538fd1498Szrj 				      si->count * *peel_iters_epilogue,
368638fd1498Szrj 				      si->kind, stmt_info, si->misalign,
368738fd1498Szrj 				      vect_epilogue);
368838fd1498Szrj 	}
368938fd1498Szrj 
369038fd1498Szrj   return retval;
369138fd1498Szrj }
369238fd1498Szrj 
369338fd1498Szrj /* Function vect_estimate_min_profitable_iters
369438fd1498Szrj 
369538fd1498Szrj    Return the number of iterations required for the vector version of the
369638fd1498Szrj    loop to be profitable relative to the cost of the scalar version of the
369738fd1498Szrj    loop.
369838fd1498Szrj 
369938fd1498Szrj    *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
370038fd1498Szrj    of iterations for vectorization.  -1 value means loop vectorization
370138fd1498Szrj    is not profitable.  This returned value may be used for dynamic
370238fd1498Szrj    profitability check.
370338fd1498Szrj 
370438fd1498Szrj    *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
370538fd1498Szrj    for static check against estimated number of iterations.  */
370638fd1498Szrj 
370738fd1498Szrj static void
vect_estimate_min_profitable_iters(loop_vec_info loop_vinfo,int * ret_min_profitable_niters,int * ret_min_profitable_estimate)370838fd1498Szrj vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
370938fd1498Szrj 				    int *ret_min_profitable_niters,
371038fd1498Szrj 				    int *ret_min_profitable_estimate)
371138fd1498Szrj {
371238fd1498Szrj   int min_profitable_iters;
371338fd1498Szrj   int min_profitable_estimate;
371438fd1498Szrj   int peel_iters_prologue;
371538fd1498Szrj   int peel_iters_epilogue;
371638fd1498Szrj   unsigned vec_inside_cost = 0;
371738fd1498Szrj   int vec_outside_cost = 0;
371838fd1498Szrj   unsigned vec_prologue_cost = 0;
371938fd1498Szrj   unsigned vec_epilogue_cost = 0;
372038fd1498Szrj   int scalar_single_iter_cost = 0;
372138fd1498Szrj   int scalar_outside_cost = 0;
372238fd1498Szrj   int assumed_vf = vect_vf_for_cost (loop_vinfo);
372338fd1498Szrj   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
372438fd1498Szrj   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
372538fd1498Szrj 
372638fd1498Szrj   /* Cost model disabled.  */
372738fd1498Szrj   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
372838fd1498Szrj     {
372938fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
373038fd1498Szrj       *ret_min_profitable_niters = 0;
373138fd1498Szrj       *ret_min_profitable_estimate = 0;
373238fd1498Szrj       return;
373338fd1498Szrj     }
373438fd1498Szrj 
373538fd1498Szrj   /* Requires loop versioning tests to handle misalignment.  */
373638fd1498Szrj   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
373738fd1498Szrj     {
373838fd1498Szrj       /*  FIXME: Make cost depend on complexity of individual check.  */
373938fd1498Szrj       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
374038fd1498Szrj       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
374138fd1498Szrj 			    vect_prologue);
374238fd1498Szrj       dump_printf (MSG_NOTE,
374338fd1498Szrj                    "cost model: Adding cost of checks for loop "
374438fd1498Szrj                    "versioning to treat misalignment.\n");
374538fd1498Szrj     }
374638fd1498Szrj 
374738fd1498Szrj   /* Requires loop versioning with alias checks.  */
374838fd1498Szrj   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
374938fd1498Szrj     {
375038fd1498Szrj       /*  FIXME: Make cost depend on complexity of individual check.  */
375138fd1498Szrj       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
375238fd1498Szrj       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
375338fd1498Szrj 			    vect_prologue);
375438fd1498Szrj       len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
375538fd1498Szrj       if (len)
375638fd1498Szrj 	/* Count LEN - 1 ANDs and LEN comparisons.  */
375738fd1498Szrj 	(void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,
375838fd1498Szrj 			      NULL, 0, vect_prologue);
375938fd1498Szrj       len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
376038fd1498Szrj       if (len)
376138fd1498Szrj 	{
376238fd1498Szrj 	  /* Count LEN - 1 ANDs and LEN comparisons.  */
376338fd1498Szrj 	  unsigned int nstmts = len * 2 - 1;
376438fd1498Szrj 	  /* +1 for each bias that needs adding.  */
376538fd1498Szrj 	  for (unsigned int i = 0; i < len; ++i)
376638fd1498Szrj 	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
376738fd1498Szrj 	      nstmts += 1;
376838fd1498Szrj 	  (void) add_stmt_cost (target_cost_data, nstmts, scalar_stmt,
376938fd1498Szrj 				NULL, 0, vect_prologue);
377038fd1498Szrj 	}
377138fd1498Szrj       dump_printf (MSG_NOTE,
377238fd1498Szrj                    "cost model: Adding cost of checks for loop "
377338fd1498Szrj                    "versioning aliasing.\n");
377438fd1498Szrj     }
377538fd1498Szrj 
377638fd1498Szrj   /* Requires loop versioning with niter checks.  */
377738fd1498Szrj   if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
377838fd1498Szrj     {
377938fd1498Szrj       /*  FIXME: Make cost depend on complexity of individual check.  */
378038fd1498Szrj       (void) add_stmt_cost (target_cost_data, 1, vector_stmt, NULL, 0,
378138fd1498Szrj 			    vect_prologue);
378238fd1498Szrj       dump_printf (MSG_NOTE,
378338fd1498Szrj 		   "cost model: Adding cost of checks for loop "
378438fd1498Szrj 		   "versioning niters.\n");
378538fd1498Szrj     }
378638fd1498Szrj 
378738fd1498Szrj   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
378838fd1498Szrj     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
378938fd1498Szrj 			  vect_prologue);
379038fd1498Szrj 
379138fd1498Szrj   /* Count statements in scalar loop.  Using this as scalar cost for a single
379238fd1498Szrj      iteration for now.
379338fd1498Szrj 
379438fd1498Szrj      TODO: Add outer loop support.
379538fd1498Szrj 
379638fd1498Szrj      TODO: Consider assigning different costs to different scalar
379738fd1498Szrj      statements.  */
379838fd1498Szrj 
379938fd1498Szrj   scalar_single_iter_cost
380038fd1498Szrj     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
380138fd1498Szrj 
380238fd1498Szrj   /* Add additional cost for the peeled instructions in prologue and epilogue
380338fd1498Szrj      loop.  (For fully-masked loops there will be no peeling.)
380438fd1498Szrj 
380538fd1498Szrj      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
380638fd1498Szrj      at compile-time - we assume it's vf/2 (the worst would be vf-1).
380738fd1498Szrj 
380838fd1498Szrj      TODO: Build an expression that represents peel_iters for prologue and
380938fd1498Szrj      epilogue to be used in a run-time test.  */
381038fd1498Szrj 
381138fd1498Szrj   if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
381238fd1498Szrj     {
381338fd1498Szrj       peel_iters_prologue = 0;
381438fd1498Szrj       peel_iters_epilogue = 0;
381538fd1498Szrj 
381638fd1498Szrj       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
381738fd1498Szrj 	{
381838fd1498Szrj 	  /* We need to peel exactly one iteration.  */
381938fd1498Szrj 	  peel_iters_epilogue += 1;
382038fd1498Szrj 	  stmt_info_for_cost *si;
382138fd1498Szrj 	  int j;
382238fd1498Szrj 	  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
382338fd1498Szrj 			    j, si)
382438fd1498Szrj 	    {
382538fd1498Szrj 	      struct _stmt_vec_info *stmt_info
382638fd1498Szrj 		= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
382738fd1498Szrj 	      (void) add_stmt_cost (target_cost_data, si->count,
382838fd1498Szrj 				    si->kind, stmt_info, si->misalign,
382938fd1498Szrj 				    vect_epilogue);
383038fd1498Szrj 	    }
383138fd1498Szrj 	}
383238fd1498Szrj     }
383338fd1498Szrj   else if (npeel < 0)
383438fd1498Szrj     {
383538fd1498Szrj       peel_iters_prologue = assumed_vf / 2;
383638fd1498Szrj       dump_printf (MSG_NOTE, "cost model: "
383738fd1498Szrj                    "prologue peel iters set to vf/2.\n");
383838fd1498Szrj 
383938fd1498Szrj       /* If peeling for alignment is unknown, loop bound of main loop becomes
384038fd1498Szrj          unknown.  */
384138fd1498Szrj       peel_iters_epilogue = assumed_vf / 2;
384238fd1498Szrj       dump_printf (MSG_NOTE, "cost model: "
384338fd1498Szrj                    "epilogue peel iters set to vf/2 because "
384438fd1498Szrj                    "peeling for alignment is unknown.\n");
384538fd1498Szrj 
384638fd1498Szrj       /* If peeled iterations are unknown, count a taken branch and a not taken
384738fd1498Szrj          branch per peeled loop. Even if scalar loop iterations are known,
384838fd1498Szrj          vector iterations are not known since peeled prologue iterations are
384938fd1498Szrj          not known. Hence guards remain the same.  */
385038fd1498Szrj       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
385138fd1498Szrj 			    NULL, 0, vect_prologue);
385238fd1498Szrj       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
385338fd1498Szrj 			    NULL, 0, vect_prologue);
385438fd1498Szrj       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
385538fd1498Szrj 			    NULL, 0, vect_epilogue);
385638fd1498Szrj       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
385738fd1498Szrj 			    NULL, 0, vect_epilogue);
385838fd1498Szrj       stmt_info_for_cost *si;
385938fd1498Szrj       int j;
386038fd1498Szrj       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
386138fd1498Szrj 	{
386238fd1498Szrj 	  struct _stmt_vec_info *stmt_info
386338fd1498Szrj 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
386438fd1498Szrj 	  (void) add_stmt_cost (target_cost_data,
386538fd1498Szrj 				si->count * peel_iters_prologue,
386638fd1498Szrj 				si->kind, stmt_info, si->misalign,
386738fd1498Szrj 				vect_prologue);
386838fd1498Szrj 	  (void) add_stmt_cost (target_cost_data,
386938fd1498Szrj 				si->count * peel_iters_epilogue,
387038fd1498Szrj 				si->kind, stmt_info, si->misalign,
387138fd1498Szrj 				vect_epilogue);
387238fd1498Szrj 	}
387338fd1498Szrj     }
387438fd1498Szrj   else
387538fd1498Szrj     {
387638fd1498Szrj       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
387738fd1498Szrj       stmt_info_for_cost *si;
387838fd1498Szrj       int j;
387938fd1498Szrj       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
388038fd1498Szrj 
388138fd1498Szrj       prologue_cost_vec.create (2);
388238fd1498Szrj       epilogue_cost_vec.create (2);
388338fd1498Szrj       peel_iters_prologue = npeel;
388438fd1498Szrj 
388538fd1498Szrj       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
388638fd1498Szrj 					  &peel_iters_epilogue,
388738fd1498Szrj 					  &LOOP_VINFO_SCALAR_ITERATION_COST
388838fd1498Szrj 					    (loop_vinfo),
388938fd1498Szrj 					  &prologue_cost_vec,
389038fd1498Szrj 					  &epilogue_cost_vec);
389138fd1498Szrj 
389238fd1498Szrj       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
389338fd1498Szrj 	{
389438fd1498Szrj 	  struct _stmt_vec_info *stmt_info
389538fd1498Szrj 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
389638fd1498Szrj 	  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
389738fd1498Szrj 				si->misalign, vect_prologue);
389838fd1498Szrj 	}
389938fd1498Szrj 
390038fd1498Szrj       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
390138fd1498Szrj 	{
390238fd1498Szrj 	  struct _stmt_vec_info *stmt_info
390338fd1498Szrj 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
390438fd1498Szrj 	  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
390538fd1498Szrj 				si->misalign, vect_epilogue);
390638fd1498Szrj 	}
390738fd1498Szrj 
390838fd1498Szrj       prologue_cost_vec.release ();
390938fd1498Szrj       epilogue_cost_vec.release ();
391038fd1498Szrj     }
391138fd1498Szrj 
391238fd1498Szrj   /* FORNOW: The scalar outside cost is incremented in one of the
391338fd1498Szrj      following ways:
391438fd1498Szrj 
391538fd1498Szrj      1. The vectorizer checks for alignment and aliasing and generates
391638fd1498Szrj      a condition that allows dynamic vectorization.  A cost model
391738fd1498Szrj      check is ANDED with the versioning condition.  Hence scalar code
391838fd1498Szrj      path now has the added cost of the versioning check.
391938fd1498Szrj 
392038fd1498Szrj        if (cost > th & versioning_check)
392138fd1498Szrj          jmp to vector code
392238fd1498Szrj 
392338fd1498Szrj      Hence run-time scalar is incremented by not-taken branch cost.
392438fd1498Szrj 
392538fd1498Szrj      2. The vectorizer then checks if a prologue is required.  If the
392638fd1498Szrj      cost model check was not done before during versioning, it has to
392738fd1498Szrj      be done before the prologue check.
392838fd1498Szrj 
392938fd1498Szrj        if (cost <= th)
393038fd1498Szrj          prologue = scalar_iters
393138fd1498Szrj        if (prologue == 0)
393238fd1498Szrj          jmp to vector code
393338fd1498Szrj        else
393438fd1498Szrj          execute prologue
393538fd1498Szrj        if (prologue == num_iters)
393638fd1498Szrj 	 go to exit
393738fd1498Szrj 
393838fd1498Szrj      Hence the run-time scalar cost is incremented by a taken branch,
393938fd1498Szrj      plus a not-taken branch, plus a taken branch cost.
394038fd1498Szrj 
394138fd1498Szrj      3. The vectorizer then checks if an epilogue is required.  If the
394238fd1498Szrj      cost model check was not done before during prologue check, it
394338fd1498Szrj      has to be done with the epilogue check.
394438fd1498Szrj 
394538fd1498Szrj        if (prologue == 0)
394638fd1498Szrj          jmp to vector code
394738fd1498Szrj        else
394838fd1498Szrj          execute prologue
394938fd1498Szrj        if (prologue == num_iters)
395038fd1498Szrj 	 go to exit
395138fd1498Szrj        vector code:
395238fd1498Szrj          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
395338fd1498Szrj            jmp to epilogue
395438fd1498Szrj 
395538fd1498Szrj      Hence the run-time scalar cost should be incremented by 2 taken
395638fd1498Szrj      branches.
395738fd1498Szrj 
395838fd1498Szrj      TODO: The back end may reorder the BBS's differently and reverse
395938fd1498Szrj      conditions/branch directions.  Change the estimates below to
396038fd1498Szrj      something more reasonable.  */
396138fd1498Szrj 
396238fd1498Szrj   /* If the number of iterations is known and we do not do versioning, we can
396338fd1498Szrj      decide whether to vectorize at compile time.  Hence the scalar version
396438fd1498Szrj      do not carry cost model guard costs.  */
396538fd1498Szrj   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
396638fd1498Szrj       || LOOP_REQUIRES_VERSIONING (loop_vinfo))
396738fd1498Szrj     {
396838fd1498Szrj       /* Cost model check occurs at versioning.  */
396938fd1498Szrj       if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
397038fd1498Szrj 	scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
397138fd1498Szrj       else
397238fd1498Szrj 	{
397338fd1498Szrj 	  /* Cost model check occurs at prologue generation.  */
397438fd1498Szrj 	  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
397538fd1498Szrj 	    scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
397638fd1498Szrj 	      + vect_get_stmt_cost (cond_branch_not_taken);
397738fd1498Szrj 	  /* Cost model check occurs at epilogue generation.  */
397838fd1498Szrj 	  else
397938fd1498Szrj 	    scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
398038fd1498Szrj 	}
398138fd1498Szrj     }
398238fd1498Szrj 
398338fd1498Szrj   /* Complete the target-specific cost calculations.  */
398438fd1498Szrj   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
398538fd1498Szrj 	       &vec_inside_cost, &vec_epilogue_cost);
398638fd1498Szrj 
398738fd1498Szrj   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
398838fd1498Szrj 
398938fd1498Szrj   if (dump_enabled_p ())
399038fd1498Szrj     {
399138fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
399238fd1498Szrj       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
399338fd1498Szrj                    vec_inside_cost);
399438fd1498Szrj       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
399538fd1498Szrj                    vec_prologue_cost);
399638fd1498Szrj       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
399738fd1498Szrj                    vec_epilogue_cost);
399838fd1498Szrj       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
399938fd1498Szrj                    scalar_single_iter_cost);
400038fd1498Szrj       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
400138fd1498Szrj                    scalar_outside_cost);
400238fd1498Szrj       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
400338fd1498Szrj                    vec_outside_cost);
400438fd1498Szrj       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
400538fd1498Szrj                    peel_iters_prologue);
400638fd1498Szrj       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
400738fd1498Szrj                    peel_iters_epilogue);
400838fd1498Szrj     }
400938fd1498Szrj 
401038fd1498Szrj   /* Calculate number of iterations required to make the vector version
401138fd1498Szrj      profitable, relative to the loop bodies only.  The following condition
401238fd1498Szrj      must hold true:
401338fd1498Szrj      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
401438fd1498Szrj      where
401538fd1498Szrj      SIC = scalar iteration cost, VIC = vector iteration cost,
401638fd1498Szrj      VOC = vector outside cost, VF = vectorization factor,
401738fd1498Szrj      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
401838fd1498Szrj      SOC = scalar outside cost for run time cost model check.  */
401938fd1498Szrj 
402038fd1498Szrj   if ((scalar_single_iter_cost * assumed_vf) > (int) vec_inside_cost)
402138fd1498Szrj     {
402238fd1498Szrj       min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
402338fd1498Szrj 			      * assumed_vf
402438fd1498Szrj 			      - vec_inside_cost * peel_iters_prologue
402538fd1498Szrj 			      - vec_inside_cost * peel_iters_epilogue);
402638fd1498Szrj       if (min_profitable_iters <= 0)
402738fd1498Szrj         min_profitable_iters = 0;
402838fd1498Szrj       else
402938fd1498Szrj 	{
403038fd1498Szrj 	  min_profitable_iters /= ((scalar_single_iter_cost * assumed_vf)
403138fd1498Szrj 				   - vec_inside_cost);
403238fd1498Szrj 
403338fd1498Szrj 	  if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
403438fd1498Szrj 	      <= (((int) vec_inside_cost * min_profitable_iters)
403538fd1498Szrj 		  + (((int) vec_outside_cost - scalar_outside_cost)
403638fd1498Szrj 		     * assumed_vf)))
403738fd1498Szrj 	    min_profitable_iters++;
403838fd1498Szrj 	}
403938fd1498Szrj     }
404038fd1498Szrj   /* vector version will never be profitable.  */
404138fd1498Szrj   else
404238fd1498Szrj     {
404338fd1498Szrj       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
404438fd1498Szrj 	warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
404538fd1498Szrj 		    "did not happen for a simd loop");
404638fd1498Szrj 
404738fd1498Szrj       if (dump_enabled_p ())
404838fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
404938fd1498Szrj 			 "cost model: the vector iteration cost = %d "
405038fd1498Szrj 			 "divided by the scalar iteration cost = %d "
405138fd1498Szrj 			 "is greater or equal to the vectorization factor = %d"
405238fd1498Szrj                          ".\n",
405338fd1498Szrj 			 vec_inside_cost, scalar_single_iter_cost, assumed_vf);
405438fd1498Szrj       *ret_min_profitable_niters = -1;
405538fd1498Szrj       *ret_min_profitable_estimate = -1;
405638fd1498Szrj       return;
405738fd1498Szrj     }
405838fd1498Szrj 
405938fd1498Szrj   dump_printf (MSG_NOTE,
406038fd1498Szrj 	       "  Calculated minimum iters for profitability: %d\n",
406138fd1498Szrj 	       min_profitable_iters);
406238fd1498Szrj 
406338fd1498Szrj   if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
406438fd1498Szrj       && min_profitable_iters < (assumed_vf + peel_iters_prologue))
406538fd1498Szrj     /* We want the vectorized loop to execute at least once.  */
406638fd1498Szrj     min_profitable_iters = assumed_vf + peel_iters_prologue;
406738fd1498Szrj 
406838fd1498Szrj   if (dump_enabled_p ())
406938fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
407038fd1498Szrj                      "  Runtime profitability threshold = %d\n",
407138fd1498Szrj                      min_profitable_iters);
407238fd1498Szrj 
407338fd1498Szrj   *ret_min_profitable_niters = min_profitable_iters;
407438fd1498Szrj 
407538fd1498Szrj   /* Calculate number of iterations required to make the vector version
407638fd1498Szrj      profitable, relative to the loop bodies only.
407738fd1498Szrj 
407838fd1498Szrj      Non-vectorized variant is SIC * niters and it must win over vector
407938fd1498Szrj      variant on the expected loop trip count.  The following condition must hold true:
408038fd1498Szrj      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
408138fd1498Szrj 
408238fd1498Szrj   if (vec_outside_cost <= 0)
408338fd1498Szrj     min_profitable_estimate = 0;
408438fd1498Szrj   else
408538fd1498Szrj     {
408638fd1498Szrj       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
408738fd1498Szrj 				 * assumed_vf
408838fd1498Szrj 				 - vec_inside_cost * peel_iters_prologue
408938fd1498Szrj 				 - vec_inside_cost * peel_iters_epilogue)
409038fd1498Szrj 				 / ((scalar_single_iter_cost * assumed_vf)
409138fd1498Szrj 				   - vec_inside_cost);
409238fd1498Szrj     }
409338fd1498Szrj   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
409438fd1498Szrj   if (dump_enabled_p ())
409538fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
409638fd1498Szrj 		     "  Static estimate profitability threshold = %d\n",
409738fd1498Szrj 		     min_profitable_estimate);
409838fd1498Szrj 
409938fd1498Szrj   *ret_min_profitable_estimate = min_profitable_estimate;
410038fd1498Szrj }
410138fd1498Szrj 
410238fd1498Szrj /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
410338fd1498Szrj    vector elements (not bits) for a vector with NELT elements.  */
410438fd1498Szrj static void
calc_vec_perm_mask_for_shift(unsigned int offset,unsigned int nelt,vec_perm_builder * sel)410538fd1498Szrj calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
410638fd1498Szrj 			      vec_perm_builder *sel)
410738fd1498Szrj {
410838fd1498Szrj   /* The encoding is a single stepped pattern.  Any wrap-around is handled
410938fd1498Szrj      by vec_perm_indices.  */
411038fd1498Szrj   sel->new_vector (nelt, 1, 3);
411138fd1498Szrj   for (unsigned int i = 0; i < 3; i++)
411238fd1498Szrj     sel->quick_push (i + offset);
411338fd1498Szrj }
411438fd1498Szrj 
411538fd1498Szrj /* Checks whether the target supports whole-vector shifts for vectors of mode
411638fd1498Szrj    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
411738fd1498Szrj    it supports vec_perm_const with masks for all necessary shift amounts.  */
411838fd1498Szrj static bool
have_whole_vector_shift(machine_mode mode)411938fd1498Szrj have_whole_vector_shift (machine_mode mode)
412038fd1498Szrj {
412138fd1498Szrj   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
412238fd1498Szrj     return true;
412338fd1498Szrj 
412438fd1498Szrj   /* Variable-length vectors should be handled via the optab.  */
412538fd1498Szrj   unsigned int nelt;
412638fd1498Szrj   if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
412738fd1498Szrj     return false;
412838fd1498Szrj 
412938fd1498Szrj   vec_perm_builder sel;
413038fd1498Szrj   vec_perm_indices indices;
413138fd1498Szrj   for (unsigned int i = nelt / 2; i >= 1; i /= 2)
413238fd1498Szrj     {
413338fd1498Szrj       calc_vec_perm_mask_for_shift (i, nelt, &sel);
413438fd1498Szrj       indices.new_vector (sel, 2, nelt);
413538fd1498Szrj       if (!can_vec_perm_const_p (mode, indices, false))
413638fd1498Szrj 	return false;
413738fd1498Szrj     }
413838fd1498Szrj   return true;
413938fd1498Szrj }
414038fd1498Szrj 
414138fd1498Szrj /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
414238fd1498Szrj    functions. Design better to avoid maintenance issues.  */
414338fd1498Szrj 
414438fd1498Szrj /* Function vect_model_reduction_cost.
414538fd1498Szrj 
414638fd1498Szrj    Models cost for a reduction operation, including the vector ops
414738fd1498Szrj    generated within the strip-mine loop, the initial definition before
414838fd1498Szrj    the loop, and the epilogue code that must be generated.  */
414938fd1498Szrj 
415038fd1498Szrj static void
vect_model_reduction_cost(stmt_vec_info stmt_info,internal_fn reduc_fn,int ncopies)415138fd1498Szrj vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
415238fd1498Szrj 			   int ncopies)
415338fd1498Szrj {
415438fd1498Szrj   int prologue_cost = 0, epilogue_cost = 0, inside_cost;
415538fd1498Szrj   enum tree_code code;
415638fd1498Szrj   optab optab;
415738fd1498Szrj   tree vectype;
415838fd1498Szrj   gimple *orig_stmt;
415938fd1498Szrj   machine_mode mode;
416038fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
416138fd1498Szrj   struct loop *loop = NULL;
416238fd1498Szrj   void *target_cost_data;
416338fd1498Szrj 
416438fd1498Szrj   if (loop_vinfo)
416538fd1498Szrj     {
416638fd1498Szrj       loop = LOOP_VINFO_LOOP (loop_vinfo);
416738fd1498Szrj       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
416838fd1498Szrj     }
416938fd1498Szrj   else
417038fd1498Szrj     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
417138fd1498Szrj 
417238fd1498Szrj   /* Condition reductions generate two reductions in the loop.  */
417338fd1498Szrj   vect_reduction_type reduction_type
417438fd1498Szrj     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
417538fd1498Szrj   if (reduction_type == COND_REDUCTION)
417638fd1498Szrj     ncopies *= 2;
417738fd1498Szrj 
417838fd1498Szrj   vectype = STMT_VINFO_VECTYPE (stmt_info);
417938fd1498Szrj   mode = TYPE_MODE (vectype);
418038fd1498Szrj   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
418138fd1498Szrj 
418238fd1498Szrj   if (!orig_stmt)
418338fd1498Szrj     orig_stmt = STMT_VINFO_STMT (stmt_info);
418438fd1498Szrj 
418538fd1498Szrj   code = gimple_assign_rhs_code (orig_stmt);
418638fd1498Szrj 
418738fd1498Szrj   if (reduction_type == EXTRACT_LAST_REDUCTION
418838fd1498Szrj       || reduction_type == FOLD_LEFT_REDUCTION)
418938fd1498Szrj     {
419038fd1498Szrj       /* No extra instructions needed in the prologue.  */
419138fd1498Szrj       prologue_cost = 0;
419238fd1498Szrj 
419338fd1498Szrj       if (reduction_type == EXTRACT_LAST_REDUCTION || reduc_fn != IFN_LAST)
419438fd1498Szrj 	/* Count one reduction-like operation per vector.  */
419538fd1498Szrj 	inside_cost = add_stmt_cost (target_cost_data, ncopies, vec_to_scalar,
419638fd1498Szrj 				     stmt_info, 0, vect_body);
419738fd1498Szrj       else
419838fd1498Szrj 	{
419938fd1498Szrj 	  /* Use NELEMENTS extracts and NELEMENTS scalar ops.  */
420038fd1498Szrj 	  unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
420138fd1498Szrj 	  inside_cost = add_stmt_cost (target_cost_data,  nelements,
420238fd1498Szrj 				       vec_to_scalar, stmt_info, 0,
420338fd1498Szrj 				       vect_body);
420438fd1498Szrj 	  inside_cost += add_stmt_cost (target_cost_data,  nelements,
420538fd1498Szrj 					scalar_stmt, stmt_info, 0,
420638fd1498Szrj 					vect_body);
420738fd1498Szrj 	}
420838fd1498Szrj     }
420938fd1498Szrj   else
421038fd1498Szrj     {
421138fd1498Szrj       /* Add in cost for initial definition.
421238fd1498Szrj 	 For cond reduction we have four vectors: initial index, step,
421338fd1498Szrj 	 initial result of the data reduction, initial value of the index
421438fd1498Szrj 	 reduction.  */
421538fd1498Szrj       int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
421638fd1498Szrj       prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
421738fd1498Szrj 				      scalar_to_vec, stmt_info, 0,
421838fd1498Szrj 				      vect_prologue);
421938fd1498Szrj 
422038fd1498Szrj       /* Cost of reduction op inside loop.  */
422138fd1498Szrj       inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
422238fd1498Szrj 				   stmt_info, 0, vect_body);
422338fd1498Szrj     }
422438fd1498Szrj 
422538fd1498Szrj   /* Determine cost of epilogue code.
422638fd1498Szrj 
422738fd1498Szrj      We have a reduction operator that will reduce the vector in one statement.
422838fd1498Szrj      Also requires scalar extract.  */
422938fd1498Szrj 
423038fd1498Szrj   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
423138fd1498Szrj     {
423238fd1498Szrj       if (reduc_fn != IFN_LAST)
423338fd1498Szrj 	{
423438fd1498Szrj 	  if (reduction_type == COND_REDUCTION)
423538fd1498Szrj 	    {
423638fd1498Szrj 	      /* An EQ stmt and an COND_EXPR stmt.  */
423738fd1498Szrj 	      epilogue_cost += add_stmt_cost (target_cost_data, 2,
423838fd1498Szrj 					      vector_stmt, stmt_info, 0,
423938fd1498Szrj 					      vect_epilogue);
424038fd1498Szrj 	      /* Reduction of the max index and a reduction of the found
424138fd1498Szrj 		 values.  */
424238fd1498Szrj 	      epilogue_cost += add_stmt_cost (target_cost_data, 2,
424338fd1498Szrj 					      vec_to_scalar, stmt_info, 0,
424438fd1498Szrj 					      vect_epilogue);
424538fd1498Szrj 	      /* A broadcast of the max value.  */
424638fd1498Szrj 	      epilogue_cost += add_stmt_cost (target_cost_data, 1,
424738fd1498Szrj 					      scalar_to_vec, stmt_info, 0,
424838fd1498Szrj 					      vect_epilogue);
424938fd1498Szrj 	    }
425038fd1498Szrj 	  else
425138fd1498Szrj 	    {
425238fd1498Szrj 	      epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
425338fd1498Szrj 					      stmt_info, 0, vect_epilogue);
425438fd1498Szrj 	      epilogue_cost += add_stmt_cost (target_cost_data, 1,
425538fd1498Szrj 					      vec_to_scalar, stmt_info, 0,
425638fd1498Szrj 					      vect_epilogue);
425738fd1498Szrj 	    }
425838fd1498Szrj 	}
425938fd1498Szrj       else if (reduction_type == COND_REDUCTION)
426038fd1498Szrj 	{
426138fd1498Szrj 	  unsigned estimated_nunits = vect_nunits_for_cost (vectype);
426238fd1498Szrj 	  /* Extraction of scalar elements.  */
426338fd1498Szrj 	  epilogue_cost += add_stmt_cost (target_cost_data,
426438fd1498Szrj 					  2 * estimated_nunits,
426538fd1498Szrj 					  vec_to_scalar, stmt_info, 0,
426638fd1498Szrj 					  vect_epilogue);
426738fd1498Szrj 	  /* Scalar max reductions via COND_EXPR / MAX_EXPR.  */
426838fd1498Szrj 	  epilogue_cost += add_stmt_cost (target_cost_data,
426938fd1498Szrj 					  2 * estimated_nunits - 3,
427038fd1498Szrj 					  scalar_stmt, stmt_info, 0,
427138fd1498Szrj 					  vect_epilogue);
427238fd1498Szrj 	}
427338fd1498Szrj       else if (reduction_type == EXTRACT_LAST_REDUCTION
427438fd1498Szrj 	       || reduction_type == FOLD_LEFT_REDUCTION)
427538fd1498Szrj 	/* No extra instructions need in the epilogue.  */
427638fd1498Szrj 	;
427738fd1498Szrj       else
427838fd1498Szrj 	{
427938fd1498Szrj 	  int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
428038fd1498Szrj 	  tree bitsize =
428138fd1498Szrj 	    TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
428238fd1498Szrj 	  int element_bitsize = tree_to_uhwi (bitsize);
428338fd1498Szrj 	  int nelements = vec_size_in_bits / element_bitsize;
428438fd1498Szrj 
428538fd1498Szrj 	  if (code == COND_EXPR)
428638fd1498Szrj 	    code = MAX_EXPR;
428738fd1498Szrj 
428838fd1498Szrj 	  optab = optab_for_tree_code (code, vectype, optab_default);
428938fd1498Szrj 
429038fd1498Szrj 	  /* We have a whole vector shift available.  */
429138fd1498Szrj 	  if (optab != unknown_optab
429238fd1498Szrj 	      && VECTOR_MODE_P (mode)
429338fd1498Szrj 	      && optab_handler (optab, mode) != CODE_FOR_nothing
429438fd1498Szrj 	      && have_whole_vector_shift (mode))
429538fd1498Szrj 	    {
429638fd1498Szrj 	      /* Final reduction via vector shifts and the reduction operator.
429738fd1498Szrj 		 Also requires scalar extract.  */
429838fd1498Szrj 	      epilogue_cost += add_stmt_cost (target_cost_data,
429938fd1498Szrj 					      exact_log2 (nelements) * 2,
430038fd1498Szrj 					      vector_stmt, stmt_info, 0,
430138fd1498Szrj 					      vect_epilogue);
430238fd1498Szrj 	      epilogue_cost += add_stmt_cost (target_cost_data, 1,
430338fd1498Szrj 					      vec_to_scalar, stmt_info, 0,
430438fd1498Szrj 					      vect_epilogue);
430538fd1498Szrj 	    }
430638fd1498Szrj 	  else
430738fd1498Szrj 	    /* Use extracts and reduction op for final reduction.  For N
430838fd1498Szrj 	       elements, we have N extracts and N-1 reduction ops.  */
430938fd1498Szrj 	    epilogue_cost += add_stmt_cost (target_cost_data,
431038fd1498Szrj 					    nelements + nelements - 1,
431138fd1498Szrj 					    vector_stmt, stmt_info, 0,
431238fd1498Szrj 					    vect_epilogue);
431338fd1498Szrj 	}
431438fd1498Szrj     }
431538fd1498Szrj 
431638fd1498Szrj   if (dump_enabled_p ())
431738fd1498Szrj     dump_printf (MSG_NOTE,
431838fd1498Szrj                  "vect_model_reduction_cost: inside_cost = %d, "
431938fd1498Szrj                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
432038fd1498Szrj                  prologue_cost, epilogue_cost);
432138fd1498Szrj }
432238fd1498Szrj 
432338fd1498Szrj 
432438fd1498Szrj /* Function vect_model_induction_cost.
432538fd1498Szrj 
432638fd1498Szrj    Models cost for induction operations.  */
432738fd1498Szrj 
432838fd1498Szrj static void
vect_model_induction_cost(stmt_vec_info stmt_info,int ncopies)432938fd1498Szrj vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
433038fd1498Szrj {
433138fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
433238fd1498Szrj   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
433338fd1498Szrj   unsigned inside_cost, prologue_cost;
433438fd1498Szrj 
433538fd1498Szrj   if (PURE_SLP_STMT (stmt_info))
433638fd1498Szrj     return;
433738fd1498Szrj 
433838fd1498Szrj   /* loop cost for vec_loop.  */
433938fd1498Szrj   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
434038fd1498Szrj 			       stmt_info, 0, vect_body);
434138fd1498Szrj 
434238fd1498Szrj   /* prologue cost for vec_init and vec_step.  */
434338fd1498Szrj   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
434438fd1498Szrj 				 stmt_info, 0, vect_prologue);
434538fd1498Szrj 
434638fd1498Szrj   if (dump_enabled_p ())
434738fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
434838fd1498Szrj                      "vect_model_induction_cost: inside_cost = %d, "
434938fd1498Szrj                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
435038fd1498Szrj }
435138fd1498Szrj 
435238fd1498Szrj 
435338fd1498Szrj 
435438fd1498Szrj /* Function get_initial_def_for_reduction
435538fd1498Szrj 
435638fd1498Szrj    Input:
435738fd1498Szrj    STMT - a stmt that performs a reduction operation in the loop.
435838fd1498Szrj    INIT_VAL - the initial value of the reduction variable
435938fd1498Szrj 
436038fd1498Szrj    Output:
436138fd1498Szrj    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
436238fd1498Szrj         of the reduction (used for adjusting the epilog - see below).
436338fd1498Szrj    Return a vector variable, initialized according to the operation that STMT
436438fd1498Szrj         performs. This vector will be used as the initial value of the
436538fd1498Szrj         vector of partial results.
436638fd1498Szrj 
436738fd1498Szrj    Option1 (adjust in epilog): Initialize the vector as follows:
436838fd1498Szrj      add/bit or/xor:    [0,0,...,0,0]
436938fd1498Szrj      mult/bit and:      [1,1,...,1,1]
437038fd1498Szrj      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
437138fd1498Szrj    and when necessary (e.g. add/mult case) let the caller know
437238fd1498Szrj    that it needs to adjust the result by init_val.
437338fd1498Szrj 
437438fd1498Szrj    Option2: Initialize the vector as follows:
437538fd1498Szrj      add/bit or/xor:    [init_val,0,0,...,0]
437638fd1498Szrj      mult/bit and:      [init_val,1,1,...,1]
437738fd1498Szrj      min/max/cond_expr: [init_val,init_val,...,init_val]
437838fd1498Szrj    and no adjustments are needed.
437938fd1498Szrj 
438038fd1498Szrj    For example, for the following code:
438138fd1498Szrj 
438238fd1498Szrj    s = init_val;
438338fd1498Szrj    for (i=0;i<n;i++)
438438fd1498Szrj      s = s + a[i];
438538fd1498Szrj 
438638fd1498Szrj    STMT is 's = s + a[i]', and the reduction variable is 's'.
438738fd1498Szrj    For a vector of 4 units, we want to return either [0,0,0,init_val],
438838fd1498Szrj    or [0,0,0,0] and let the caller know that it needs to adjust
438938fd1498Szrj    the result at the end by 'init_val'.
439038fd1498Szrj 
439138fd1498Szrj    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
439238fd1498Szrj    initialization vector is simpler (same element in all entries), if
439338fd1498Szrj    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
439438fd1498Szrj 
439538fd1498Szrj    A cost model should help decide between these two schemes.  */
439638fd1498Szrj 
439738fd1498Szrj tree
get_initial_def_for_reduction(gimple * stmt,tree init_val,tree * adjustment_def)439838fd1498Szrj get_initial_def_for_reduction (gimple *stmt, tree init_val,
439938fd1498Szrj                                tree *adjustment_def)
440038fd1498Szrj {
440138fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
440238fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
440338fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
440438fd1498Szrj   tree scalar_type = TREE_TYPE (init_val);
440538fd1498Szrj   tree vectype = get_vectype_for_scalar_type (scalar_type);
440638fd1498Szrj   enum tree_code code = gimple_assign_rhs_code (stmt);
440738fd1498Szrj   tree def_for_init;
440838fd1498Szrj   tree init_def;
440938fd1498Szrj   bool nested_in_vect_loop = false;
441038fd1498Szrj   REAL_VALUE_TYPE real_init_val = dconst0;
441138fd1498Szrj   int int_init_val = 0;
441238fd1498Szrj   gimple *def_stmt = NULL;
441338fd1498Szrj   gimple_seq stmts = NULL;
441438fd1498Szrj 
441538fd1498Szrj   gcc_assert (vectype);
441638fd1498Szrj 
441738fd1498Szrj   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
441838fd1498Szrj 	      || SCALAR_FLOAT_TYPE_P (scalar_type));
441938fd1498Szrj 
442038fd1498Szrj   if (nested_in_vect_loop_p (loop, stmt))
442138fd1498Szrj     nested_in_vect_loop = true;
442238fd1498Szrj   else
442338fd1498Szrj     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
442438fd1498Szrj 
442538fd1498Szrj   /* In case of double reduction we only create a vector variable to be put
442638fd1498Szrj      in the reduction phi node.  The actual statement creation is done in
442738fd1498Szrj      vect_create_epilog_for_reduction.  */
442838fd1498Szrj   if (adjustment_def && nested_in_vect_loop
442938fd1498Szrj       && TREE_CODE (init_val) == SSA_NAME
443038fd1498Szrj       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
443138fd1498Szrj       && gimple_code (def_stmt) == GIMPLE_PHI
443238fd1498Szrj       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
443338fd1498Szrj       && vinfo_for_stmt (def_stmt)
443438fd1498Szrj       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
443538fd1498Szrj           == vect_double_reduction_def)
443638fd1498Szrj     {
443738fd1498Szrj       *adjustment_def = NULL;
443838fd1498Szrj       return vect_create_destination_var (init_val, vectype);
443938fd1498Szrj     }
444038fd1498Szrj 
444138fd1498Szrj   vect_reduction_type reduction_type
444238fd1498Szrj     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo);
444338fd1498Szrj 
444438fd1498Szrj   /* In case of a nested reduction do not use an adjustment def as
444538fd1498Szrj      that case is not supported by the epilogue generation correctly
444638fd1498Szrj      if ncopies is not one.  */
444738fd1498Szrj   if (adjustment_def && nested_in_vect_loop)
444838fd1498Szrj     {
444938fd1498Szrj       *adjustment_def = NULL;
445038fd1498Szrj       return vect_get_vec_def_for_operand (init_val, stmt);
445138fd1498Szrj     }
445238fd1498Szrj 
445338fd1498Szrj   switch (code)
445438fd1498Szrj     {
445538fd1498Szrj     case WIDEN_SUM_EXPR:
445638fd1498Szrj     case DOT_PROD_EXPR:
445738fd1498Szrj     case SAD_EXPR:
445838fd1498Szrj     case PLUS_EXPR:
445938fd1498Szrj     case MINUS_EXPR:
446038fd1498Szrj     case BIT_IOR_EXPR:
446138fd1498Szrj     case BIT_XOR_EXPR:
446238fd1498Szrj     case MULT_EXPR:
446338fd1498Szrj     case BIT_AND_EXPR:
446438fd1498Szrj       {
446538fd1498Szrj         /* ADJUSTMENT_DEF is NULL when called from
446638fd1498Szrj            vect_create_epilog_for_reduction to vectorize double reduction.  */
446738fd1498Szrj         if (adjustment_def)
446838fd1498Szrj 	  *adjustment_def = init_val;
446938fd1498Szrj 
447038fd1498Szrj         if (code == MULT_EXPR)
447138fd1498Szrj           {
447238fd1498Szrj             real_init_val = dconst1;
447338fd1498Szrj             int_init_val = 1;
447438fd1498Szrj           }
447538fd1498Szrj 
447638fd1498Szrj         if (code == BIT_AND_EXPR)
447738fd1498Szrj           int_init_val = -1;
447838fd1498Szrj 
447938fd1498Szrj         if (SCALAR_FLOAT_TYPE_P (scalar_type))
448038fd1498Szrj           def_for_init = build_real (scalar_type, real_init_val);
448138fd1498Szrj         else
448238fd1498Szrj           def_for_init = build_int_cst (scalar_type, int_init_val);
448338fd1498Szrj 
448438fd1498Szrj 	if (adjustment_def)
448538fd1498Szrj 	  /* Option1: the first element is '0' or '1' as well.  */
448638fd1498Szrj 	  init_def = gimple_build_vector_from_val (&stmts, vectype,
448738fd1498Szrj 						   def_for_init);
448838fd1498Szrj 	else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
448938fd1498Szrj 	  {
449038fd1498Szrj 	    /* Option2 (variable length): the first element is INIT_VAL.  */
449138fd1498Szrj 	    init_def = build_vector_from_val (vectype, def_for_init);
449238fd1498Szrj 	    gcall *call = gimple_build_call_internal (IFN_VEC_SHL_INSERT,
449338fd1498Szrj 						      2, init_def, init_val);
449438fd1498Szrj 	    init_def = make_ssa_name (vectype);
449538fd1498Szrj 	    gimple_call_set_lhs (call, init_def);
449638fd1498Szrj 	    gimple_seq_add_stmt (&stmts, call);
449738fd1498Szrj 	  }
449838fd1498Szrj 	else
449938fd1498Szrj 	  {
450038fd1498Szrj 	    /* Option2: the first element is INIT_VAL.  */
450138fd1498Szrj 	    tree_vector_builder elts (vectype, 1, 2);
450238fd1498Szrj 	    elts.quick_push (init_val);
450338fd1498Szrj 	    elts.quick_push (def_for_init);
450438fd1498Szrj 	    init_def = gimple_build_vector (&stmts, &elts);
450538fd1498Szrj 	  }
450638fd1498Szrj       }
450738fd1498Szrj       break;
450838fd1498Szrj 
450938fd1498Szrj     case MIN_EXPR:
451038fd1498Szrj     case MAX_EXPR:
451138fd1498Szrj     case COND_EXPR:
451238fd1498Szrj       {
451338fd1498Szrj 	if (adjustment_def)
451438fd1498Szrj           {
451538fd1498Szrj 	    *adjustment_def = NULL_TREE;
451638fd1498Szrj 	    if (reduction_type != COND_REDUCTION
451738fd1498Szrj 		&& reduction_type != EXTRACT_LAST_REDUCTION)
451838fd1498Szrj 	      {
451938fd1498Szrj 		init_def = vect_get_vec_def_for_operand (init_val, stmt);
452038fd1498Szrj 		break;
452138fd1498Szrj 	      }
452238fd1498Szrj 	  }
452338fd1498Szrj 	init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
452438fd1498Szrj 	init_def = gimple_build_vector_from_val (&stmts, vectype, init_val);
452538fd1498Szrj       }
452638fd1498Szrj       break;
452738fd1498Szrj 
452838fd1498Szrj     default:
452938fd1498Szrj       gcc_unreachable ();
453038fd1498Szrj     }
453138fd1498Szrj 
453238fd1498Szrj   if (stmts)
453338fd1498Szrj     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
453438fd1498Szrj   return init_def;
453538fd1498Szrj }
453638fd1498Szrj 
453738fd1498Szrj /* Get at the initial defs for the reduction PHIs in SLP_NODE.
453838fd1498Szrj    NUMBER_OF_VECTORS is the number of vector defs to create.
453938fd1498Szrj    If NEUTRAL_OP is nonnull, introducing extra elements of that
454038fd1498Szrj    value will not change the result.  */
454138fd1498Szrj 
454238fd1498Szrj static void
get_initial_defs_for_reduction(slp_tree slp_node,vec<tree> * vec_oprnds,unsigned int number_of_vectors,bool reduc_chain,tree neutral_op)454338fd1498Szrj get_initial_defs_for_reduction (slp_tree slp_node,
454438fd1498Szrj 				vec<tree> *vec_oprnds,
454538fd1498Szrj 				unsigned int number_of_vectors,
454638fd1498Szrj 				bool reduc_chain, tree neutral_op)
454738fd1498Szrj {
454838fd1498Szrj   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
454938fd1498Szrj   gimple *stmt = stmts[0];
455038fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
455138fd1498Szrj   unsigned HOST_WIDE_INT nunits;
455238fd1498Szrj   unsigned j, number_of_places_left_in_vector;
455338fd1498Szrj   tree vector_type;
455438fd1498Szrj   tree vop;
455538fd1498Szrj   int group_size = stmts.length ();
455638fd1498Szrj   unsigned int vec_num, i;
455738fd1498Szrj   unsigned number_of_copies = 1;
455838fd1498Szrj   vec<tree> voprnds;
455938fd1498Szrj   voprnds.create (number_of_vectors);
456038fd1498Szrj   struct loop *loop;
456138fd1498Szrj   auto_vec<tree, 16> permute_results;
456238fd1498Szrj 
456338fd1498Szrj   vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
456438fd1498Szrj 
456538fd1498Szrj   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def);
456638fd1498Szrj 
456738fd1498Szrj   loop = (gimple_bb (stmt))->loop_father;
456838fd1498Szrj   gcc_assert (loop);
456938fd1498Szrj   edge pe = loop_preheader_edge (loop);
457038fd1498Szrj 
457138fd1498Szrj   gcc_assert (!reduc_chain || neutral_op);
457238fd1498Szrj 
457338fd1498Szrj   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
457438fd1498Szrj      created vectors. It is greater than 1 if unrolling is performed.
457538fd1498Szrj 
457638fd1498Szrj      For example, we have two scalar operands, s1 and s2 (e.g., group of
457738fd1498Szrj      strided accesses of size two), while NUNITS is four (i.e., four scalars
457838fd1498Szrj      of this type can be packed in a vector).  The output vector will contain
457938fd1498Szrj      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
458038fd1498Szrj      will be 2).
458138fd1498Szrj 
458238fd1498Szrj      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
458338fd1498Szrj      containing the operands.
458438fd1498Szrj 
458538fd1498Szrj      For example, NUNITS is four as before, and the group size is 8
458638fd1498Szrj      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
458738fd1498Szrj      {s5, s6, s7, s8}.  */
458838fd1498Szrj 
458938fd1498Szrj   if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
459038fd1498Szrj     nunits = group_size;
459138fd1498Szrj 
459238fd1498Szrj   number_of_copies = nunits * number_of_vectors / group_size;
459338fd1498Szrj 
459438fd1498Szrj   number_of_places_left_in_vector = nunits;
459538fd1498Szrj   bool constant_p = true;
459638fd1498Szrj   tree_vector_builder elts (vector_type, nunits, 1);
459738fd1498Szrj   elts.quick_grow (nunits);
459838fd1498Szrj   for (j = 0; j < number_of_copies; j++)
459938fd1498Szrj     {
460038fd1498Szrj       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
460138fd1498Szrj         {
460238fd1498Szrj 	  tree op;
460338fd1498Szrj 	  /* Get the def before the loop.  In reduction chain we have only
460438fd1498Szrj 	     one initial value.  */
460538fd1498Szrj 	  if ((j != (number_of_copies - 1)
460638fd1498Szrj 	       || (reduc_chain && i != 0))
460738fd1498Szrj 	      && neutral_op)
460838fd1498Szrj 	    op = neutral_op;
460938fd1498Szrj 	  else
461038fd1498Szrj 	    op = PHI_ARG_DEF_FROM_EDGE (stmt, pe);
461138fd1498Szrj 
461238fd1498Szrj           /* Create 'vect_ = {op0,op1,...,opn}'.  */
461338fd1498Szrj           number_of_places_left_in_vector--;
461438fd1498Szrj 	  elts[number_of_places_left_in_vector] = op;
461538fd1498Szrj 	  if (!CONSTANT_CLASS_P (op))
461638fd1498Szrj 	    constant_p = false;
461738fd1498Szrj 
461838fd1498Szrj           if (number_of_places_left_in_vector == 0)
461938fd1498Szrj             {
462038fd1498Szrj 	      gimple_seq ctor_seq = NULL;
462138fd1498Szrj 	      tree init;
462238fd1498Szrj 	      if (constant_p && !neutral_op
462338fd1498Szrj 		  ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
462438fd1498Szrj 		  : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
462538fd1498Szrj 		/* Build the vector directly from ELTS.  */
462638fd1498Szrj 		init = gimple_build_vector (&ctor_seq, &elts);
462738fd1498Szrj 	      else if (neutral_op)
462838fd1498Szrj 		{
462938fd1498Szrj 		  /* Build a vector of the neutral value and shift the
463038fd1498Szrj 		     other elements into place.  */
463138fd1498Szrj 		  init = gimple_build_vector_from_val (&ctor_seq, vector_type,
463238fd1498Szrj 						       neutral_op);
463338fd1498Szrj 		  int k = nunits;
463438fd1498Szrj 		  while (k > 0 && elts[k - 1] == neutral_op)
463538fd1498Szrj 		    k -= 1;
463638fd1498Szrj 		  while (k > 0)
463738fd1498Szrj 		    {
463838fd1498Szrj 		      k -= 1;
463938fd1498Szrj 		      gcall *call = gimple_build_call_internal
464038fd1498Szrj 			(IFN_VEC_SHL_INSERT, 2, init, elts[k]);
464138fd1498Szrj 		      init = make_ssa_name (vector_type);
464238fd1498Szrj 		      gimple_call_set_lhs (call, init);
464338fd1498Szrj 		      gimple_seq_add_stmt (&ctor_seq, call);
464438fd1498Szrj 		    }
464538fd1498Szrj 		}
464638fd1498Szrj 	      else
464738fd1498Szrj 		{
464838fd1498Szrj 		  /* First time round, duplicate ELTS to fill the
464938fd1498Szrj 		     required number of vectors, then cherry pick the
465038fd1498Szrj 		     appropriate result for each iteration.  */
465138fd1498Szrj 		  if (vec_oprnds->is_empty ())
465238fd1498Szrj 		    duplicate_and_interleave (&ctor_seq, vector_type, elts,
465338fd1498Szrj 					      number_of_vectors,
465438fd1498Szrj 					      permute_results);
465538fd1498Szrj 		  init = permute_results[number_of_vectors - j - 1];
465638fd1498Szrj 		}
465738fd1498Szrj 	      if (ctor_seq != NULL)
465838fd1498Szrj 		gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
465938fd1498Szrj 	      voprnds.quick_push (init);
466038fd1498Szrj 
466138fd1498Szrj               number_of_places_left_in_vector = nunits;
466238fd1498Szrj 	      elts.new_vector (vector_type, nunits, 1);
466338fd1498Szrj 	      elts.quick_grow (nunits);
466438fd1498Szrj 	      constant_p = true;
466538fd1498Szrj             }
466638fd1498Szrj         }
466738fd1498Szrj     }
466838fd1498Szrj 
466938fd1498Szrj   /* Since the vectors are created in the reverse order, we should invert
467038fd1498Szrj      them.  */
467138fd1498Szrj   vec_num = voprnds.length ();
467238fd1498Szrj   for (j = vec_num; j != 0; j--)
467338fd1498Szrj     {
467438fd1498Szrj       vop = voprnds[j - 1];
467538fd1498Szrj       vec_oprnds->quick_push (vop);
467638fd1498Szrj     }
467738fd1498Szrj 
467838fd1498Szrj   voprnds.release ();
467938fd1498Szrj 
468038fd1498Szrj   /* In case that VF is greater than the unrolling factor needed for the SLP
468138fd1498Szrj      group of stmts, NUMBER_OF_VECTORS to be created is greater than
468238fd1498Szrj      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
468338fd1498Szrj      to replicate the vectors.  */
468438fd1498Szrj   tree neutral_vec = NULL;
468538fd1498Szrj   while (number_of_vectors > vec_oprnds->length ())
468638fd1498Szrj     {
468738fd1498Szrj       if (neutral_op)
468838fd1498Szrj         {
468938fd1498Szrj           if (!neutral_vec)
469038fd1498Szrj 	    {
469138fd1498Szrj 	      gimple_seq ctor_seq = NULL;
469238fd1498Szrj 	      neutral_vec = gimple_build_vector_from_val
469338fd1498Szrj 		(&ctor_seq, vector_type, neutral_op);
469438fd1498Szrj 	      if (ctor_seq != NULL)
469538fd1498Szrj 		gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
469638fd1498Szrj 	    }
469738fd1498Szrj           vec_oprnds->quick_push (neutral_vec);
469838fd1498Szrj         }
469938fd1498Szrj       else
470038fd1498Szrj         {
470138fd1498Szrj           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
470238fd1498Szrj             vec_oprnds->quick_push (vop);
470338fd1498Szrj         }
470438fd1498Szrj     }
470538fd1498Szrj }
470638fd1498Szrj 
470738fd1498Szrj 
470838fd1498Szrj /* Function vect_create_epilog_for_reduction
470938fd1498Szrj 
471038fd1498Szrj    Create code at the loop-epilog to finalize the result of a reduction
471138fd1498Szrj    computation.
471238fd1498Szrj 
471338fd1498Szrj    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
471438fd1498Szrj      reduction statements.
471538fd1498Szrj    STMT is the scalar reduction stmt that is being vectorized.
471638fd1498Szrj    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
471738fd1498Szrj      number of elements that we can fit in a vectype (nunits).  In this case
471838fd1498Szrj      we have to generate more than one vector stmt - i.e - we need to "unroll"
471938fd1498Szrj      the vector stmt by a factor VF/nunits.  For more details see documentation
472038fd1498Szrj      in vectorizable_operation.
472138fd1498Szrj    REDUC_FN is the internal function for the epilog reduction.
472238fd1498Szrj    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
472338fd1498Szrj      computation.
472438fd1498Szrj    REDUC_INDEX is the index of the operand in the right hand side of the
472538fd1498Szrj      statement that is defined by REDUCTION_PHI.
472638fd1498Szrj    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
472738fd1498Szrj    SLP_NODE is an SLP node containing a group of reduction statements. The
472838fd1498Szrj      first one in this group is STMT.
472938fd1498Szrj    INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case
473038fd1498Szrj      when the COND_EXPR is never true in the loop.  For MAX_EXPR, it needs to
473138fd1498Szrj      be smaller than any value of the IV in the loop, for MIN_EXPR larger than
473238fd1498Szrj      any value of the IV in the loop.
473338fd1498Szrj    INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION.
473438fd1498Szrj    NEUTRAL_OP is the value given by neutral_op_for_slp_reduction; it is
473538fd1498Szrj      null if this is not an SLP reduction
473638fd1498Szrj 
473738fd1498Szrj    This function:
473838fd1498Szrj    1. Creates the reduction def-use cycles: sets the arguments for
473938fd1498Szrj       REDUCTION_PHIS:
474038fd1498Szrj       The loop-entry argument is the vectorized initial-value of the reduction.
474138fd1498Szrj       The loop-latch argument is taken from VECT_DEFS - the vector of partial
474238fd1498Szrj       sums.
474338fd1498Szrj    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
474438fd1498Szrj       by calling the function specified by REDUC_FN if available, or by
474538fd1498Szrj       other means (whole-vector shifts or a scalar loop).
474638fd1498Szrj       The function also creates a new phi node at the loop exit to preserve
474738fd1498Szrj       loop-closed form, as illustrated below.
474838fd1498Szrj 
474938fd1498Szrj      The flow at the entry to this function:
475038fd1498Szrj 
475138fd1498Szrj         loop:
475238fd1498Szrj           vec_def = phi <null, null>            # REDUCTION_PHI
475338fd1498Szrj           VECT_DEF = vector_stmt                # vectorized form of STMT
475438fd1498Szrj           s_loop = scalar_stmt                  # (scalar) STMT
475538fd1498Szrj         loop_exit:
475638fd1498Szrj           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
475738fd1498Szrj           use <s_out0>
475838fd1498Szrj           use <s_out0>
475938fd1498Szrj 
476038fd1498Szrj      The above is transformed by this function into:
476138fd1498Szrj 
476238fd1498Szrj         loop:
476338fd1498Szrj           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
476438fd1498Szrj           VECT_DEF = vector_stmt                # vectorized form of STMT
476538fd1498Szrj           s_loop = scalar_stmt                  # (scalar) STMT
476638fd1498Szrj         loop_exit:
476738fd1498Szrj           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
476838fd1498Szrj           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
476938fd1498Szrj           v_out2 = reduce <v_out1>
477038fd1498Szrj           s_out3 = extract_field <v_out2, 0>
477138fd1498Szrj           s_out4 = adjust_result <s_out3>
477238fd1498Szrj           use <s_out4>
477338fd1498Szrj           use <s_out4>
477438fd1498Szrj */
477538fd1498Szrj 
477638fd1498Szrj static void
vect_create_epilog_for_reduction(vec<tree> vect_defs,gimple * stmt,gimple * reduc_def_stmt,int ncopies,internal_fn reduc_fn,vec<gimple * > reduction_phis,bool double_reduc,slp_tree slp_node,slp_instance slp_node_instance,tree induc_val,enum tree_code induc_code,tree neutral_op)477738fd1498Szrj vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
477838fd1498Szrj 				  gimple *reduc_def_stmt,
477938fd1498Szrj 				  int ncopies, internal_fn reduc_fn,
478038fd1498Szrj 				  vec<gimple *> reduction_phis,
478138fd1498Szrj                                   bool double_reduc,
478238fd1498Szrj 				  slp_tree slp_node,
478338fd1498Szrj 				  slp_instance slp_node_instance,
478438fd1498Szrj 				  tree induc_val, enum tree_code induc_code,
478538fd1498Szrj 				  tree neutral_op)
478638fd1498Szrj {
478738fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
478838fd1498Szrj   stmt_vec_info prev_phi_info;
478938fd1498Szrj   tree vectype;
479038fd1498Szrj   machine_mode mode;
479138fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
479238fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
479338fd1498Szrj   basic_block exit_bb;
479438fd1498Szrj   tree scalar_dest;
479538fd1498Szrj   tree scalar_type;
479638fd1498Szrj   gimple *new_phi = NULL, *phi;
479738fd1498Szrj   gimple_stmt_iterator exit_gsi;
479838fd1498Szrj   tree vec_dest;
479938fd1498Szrj   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
480038fd1498Szrj   gimple *epilog_stmt = NULL;
480138fd1498Szrj   enum tree_code code = gimple_assign_rhs_code (stmt);
480238fd1498Szrj   gimple *exit_phi;
480338fd1498Szrj   tree bitsize;
480438fd1498Szrj   tree adjustment_def = NULL;
480538fd1498Szrj   tree vec_initial_def = NULL;
480638fd1498Szrj   tree expr, def, initial_def = NULL;
480738fd1498Szrj   tree orig_name, scalar_result;
480838fd1498Szrj   imm_use_iterator imm_iter, phi_imm_iter;
480938fd1498Szrj   use_operand_p use_p, phi_use_p;
481038fd1498Szrj   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
481138fd1498Szrj   bool nested_in_vect_loop = false;
481238fd1498Szrj   auto_vec<gimple *> new_phis;
481338fd1498Szrj   auto_vec<gimple *> inner_phis;
481438fd1498Szrj   enum vect_def_type dt = vect_unknown_def_type;
481538fd1498Szrj   int j, i;
481638fd1498Szrj   auto_vec<tree> scalar_results;
481738fd1498Szrj   unsigned int group_size = 1, k, ratio;
481838fd1498Szrj   auto_vec<tree> vec_initial_defs;
481938fd1498Szrj   auto_vec<gimple *> phis;
482038fd1498Szrj   bool slp_reduc = false;
482138fd1498Szrj   bool direct_slp_reduc;
482238fd1498Szrj   tree new_phi_result;
482338fd1498Szrj   gimple *inner_phi = NULL;
482438fd1498Szrj   tree induction_index = NULL_TREE;
482538fd1498Szrj 
482638fd1498Szrj   if (slp_node)
482738fd1498Szrj     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
482838fd1498Szrj 
482938fd1498Szrj   if (nested_in_vect_loop_p (loop, stmt))
483038fd1498Szrj     {
483138fd1498Szrj       outer_loop = loop;
483238fd1498Szrj       loop = loop->inner;
483338fd1498Szrj       nested_in_vect_loop = true;
483438fd1498Szrj       gcc_assert (!slp_node);
483538fd1498Szrj     }
483638fd1498Szrj 
483738fd1498Szrj   vectype = STMT_VINFO_VECTYPE (stmt_info);
483838fd1498Szrj   gcc_assert (vectype);
483938fd1498Szrj   mode = TYPE_MODE (vectype);
484038fd1498Szrj 
484138fd1498Szrj   /* 1. Create the reduction def-use cycle:
484238fd1498Szrj      Set the arguments of REDUCTION_PHIS, i.e., transform
484338fd1498Szrj 
484438fd1498Szrj         loop:
484538fd1498Szrj           vec_def = phi <null, null>            # REDUCTION_PHI
484638fd1498Szrj           VECT_DEF = vector_stmt                # vectorized form of STMT
484738fd1498Szrj           ...
484838fd1498Szrj 
484938fd1498Szrj      into:
485038fd1498Szrj 
485138fd1498Szrj         loop:
485238fd1498Szrj           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
485338fd1498Szrj           VECT_DEF = vector_stmt                # vectorized form of STMT
485438fd1498Szrj           ...
485538fd1498Szrj 
485638fd1498Szrj      (in case of SLP, do it for all the phis). */
485738fd1498Szrj 
485838fd1498Szrj   /* Get the loop-entry arguments.  */
485938fd1498Szrj   enum vect_def_type initial_def_dt = vect_unknown_def_type;
486038fd1498Szrj   if (slp_node)
486138fd1498Szrj     {
486238fd1498Szrj       unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
486338fd1498Szrj       vec_initial_defs.reserve (vec_num);
486438fd1498Szrj       get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
486538fd1498Szrj 				      &vec_initial_defs, vec_num,
486638fd1498Szrj 				      GROUP_FIRST_ELEMENT (stmt_info),
486738fd1498Szrj 				      neutral_op);
486838fd1498Szrj     }
486938fd1498Szrj   else
487038fd1498Szrj     {
487138fd1498Szrj       /* Get at the scalar def before the loop, that defines the initial value
487238fd1498Szrj 	 of the reduction variable.  */
487338fd1498Szrj       gimple *def_stmt;
487438fd1498Szrj       initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
487538fd1498Szrj 					   loop_preheader_edge (loop));
487638fd1498Szrj       /* Optimize: if initial_def is for REDUC_MAX smaller than the base
487738fd1498Szrj 	 and we can't use zero for induc_val, use initial_def.  Similarly
487838fd1498Szrj 	 for REDUC_MIN and initial_def larger than the base.  */
487938fd1498Szrj       if (TREE_CODE (initial_def) == INTEGER_CST
488038fd1498Szrj 	  && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
488138fd1498Szrj 	      == INTEGER_INDUC_COND_REDUCTION)
488238fd1498Szrj 	  && !integer_zerop (induc_val)
488338fd1498Szrj 	  && ((induc_code == MAX_EXPR
488438fd1498Szrj 	       && tree_int_cst_lt (initial_def, induc_val))
488538fd1498Szrj 	      || (induc_code == MIN_EXPR
488638fd1498Szrj 		  && tree_int_cst_lt (induc_val, initial_def))))
488738fd1498Szrj 	induc_val = initial_def;
488838fd1498Szrj       vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt);
488938fd1498Szrj       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
489038fd1498Szrj 						       &adjustment_def);
489138fd1498Szrj       vec_initial_defs.create (1);
489238fd1498Szrj       vec_initial_defs.quick_push (vec_initial_def);
489338fd1498Szrj     }
489438fd1498Szrj 
489538fd1498Szrj   /* Set phi nodes arguments.  */
489638fd1498Szrj   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
489738fd1498Szrj     {
489838fd1498Szrj       tree vec_init_def = vec_initial_defs[i];
489938fd1498Szrj       tree def = vect_defs[i];
490038fd1498Szrj       for (j = 0; j < ncopies; j++)
490138fd1498Szrj         {
490238fd1498Szrj 	  if (j != 0)
490338fd1498Szrj 	    {
490438fd1498Szrj 	      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
490538fd1498Szrj 	      if (nested_in_vect_loop)
490638fd1498Szrj 		vec_init_def
490738fd1498Szrj 		  = vect_get_vec_def_for_stmt_copy (initial_def_dt,
490838fd1498Szrj 						    vec_init_def);
490938fd1498Szrj 	    }
491038fd1498Szrj 
491138fd1498Szrj 	  /* Set the loop-entry arg of the reduction-phi.  */
491238fd1498Szrj 
491338fd1498Szrj 	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
491438fd1498Szrj 	      == INTEGER_INDUC_COND_REDUCTION)
491538fd1498Szrj 	    {
491638fd1498Szrj 	      /* Initialise the reduction phi to zero.  This prevents initial
491738fd1498Szrj 		 values of non-zero interferring with the reduction op.  */
491838fd1498Szrj 	      gcc_assert (ncopies == 1);
491938fd1498Szrj 	      gcc_assert (i == 0);
492038fd1498Szrj 
492138fd1498Szrj 	      tree vec_init_def_type = TREE_TYPE (vec_init_def);
492238fd1498Szrj 	      tree induc_val_vec
492338fd1498Szrj 		= build_vector_from_val (vec_init_def_type, induc_val);
492438fd1498Szrj 
492538fd1498Szrj 	      add_phi_arg (as_a <gphi *> (phi), induc_val_vec,
492638fd1498Szrj 			   loop_preheader_edge (loop), UNKNOWN_LOCATION);
492738fd1498Szrj 	    }
492838fd1498Szrj 	  else
492938fd1498Szrj 	    add_phi_arg (as_a <gphi *> (phi), vec_init_def,
493038fd1498Szrj 			 loop_preheader_edge (loop), UNKNOWN_LOCATION);
493138fd1498Szrj 
493238fd1498Szrj           /* Set the loop-latch arg for the reduction-phi.  */
493338fd1498Szrj           if (j > 0)
493438fd1498Szrj             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
493538fd1498Szrj 
493638fd1498Szrj           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
493738fd1498Szrj 		       UNKNOWN_LOCATION);
493838fd1498Szrj 
493938fd1498Szrj           if (dump_enabled_p ())
494038fd1498Szrj             {
494138fd1498Szrj               dump_printf_loc (MSG_NOTE, vect_location,
494238fd1498Szrj 			       "transform reduction: created def-use cycle: ");
494338fd1498Szrj               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
494438fd1498Szrj               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
494538fd1498Szrj             }
494638fd1498Szrj         }
494738fd1498Szrj     }
494838fd1498Szrj 
494938fd1498Szrj   /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
495038fd1498Szrj      which is updated with the current index of the loop for every match of
495138fd1498Szrj      the original loop's cond_expr (VEC_STMT).  This results in a vector
495238fd1498Szrj      containing the last time the condition passed for that vector lane.
495338fd1498Szrj      The first match will be a 1 to allow 0 to be used for non-matching
495438fd1498Szrj      indexes.  If there are no matches at all then the vector will be all
495538fd1498Szrj      zeroes.  */
495638fd1498Szrj   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
495738fd1498Szrj     {
495838fd1498Szrj       tree indx_before_incr, indx_after_incr;
495938fd1498Szrj       poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
496038fd1498Szrj 
496138fd1498Szrj       gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
496238fd1498Szrj       gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
496338fd1498Szrj 
496438fd1498Szrj       int scalar_precision
496538fd1498Szrj 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype)));
496638fd1498Szrj       tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
496738fd1498Szrj       tree cr_index_vector_type = build_vector_type
496838fd1498Szrj 	(cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype));
496938fd1498Szrj 
497038fd1498Szrj       /* First we create a simple vector induction variable which starts
497138fd1498Szrj 	 with the values {1,2,3,...} (SERIES_VECT) and increments by the
497238fd1498Szrj 	 vector size (STEP).  */
497338fd1498Szrj 
497438fd1498Szrj       /* Create a {1,2,3,...} vector.  */
497538fd1498Szrj       tree series_vect = build_index_vector (cr_index_vector_type, 1, 1);
497638fd1498Szrj 
497738fd1498Szrj       /* Create a vector of the step value.  */
497838fd1498Szrj       tree step = build_int_cst (cr_index_scalar_type, nunits_out);
497938fd1498Szrj       tree vec_step = build_vector_from_val (cr_index_vector_type, step);
498038fd1498Szrj 
498138fd1498Szrj       /* Create an induction variable.  */
498238fd1498Szrj       gimple_stmt_iterator incr_gsi;
498338fd1498Szrj       bool insert_after;
498438fd1498Szrj       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
498538fd1498Szrj       create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
498638fd1498Szrj 		 insert_after, &indx_before_incr, &indx_after_incr);
498738fd1498Szrj 
498838fd1498Szrj       /* Next create a new phi node vector (NEW_PHI_TREE) which starts
498938fd1498Szrj 	 filled with zeros (VEC_ZERO).  */
499038fd1498Szrj 
499138fd1498Szrj       /* Create a vector of 0s.  */
499238fd1498Szrj       tree zero = build_zero_cst (cr_index_scalar_type);
499338fd1498Szrj       tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
499438fd1498Szrj 
499538fd1498Szrj       /* Create a vector phi node.  */
499638fd1498Szrj       tree new_phi_tree = make_ssa_name (cr_index_vector_type);
499738fd1498Szrj       new_phi = create_phi_node (new_phi_tree, loop->header);
499838fd1498Szrj       set_vinfo_for_stmt (new_phi,
499938fd1498Szrj 			  new_stmt_vec_info (new_phi, loop_vinfo));
500038fd1498Szrj       add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
500138fd1498Szrj 		   loop_preheader_edge (loop), UNKNOWN_LOCATION);
500238fd1498Szrj 
500338fd1498Szrj       /* Now take the condition from the loops original cond_expr
500438fd1498Szrj 	 (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
500538fd1498Szrj 	 every match uses values from the induction variable
500638fd1498Szrj 	 (INDEX_BEFORE_INCR) otherwise uses values from the phi node
500738fd1498Szrj 	 (NEW_PHI_TREE).
500838fd1498Szrj 	 Finally, we update the phi (NEW_PHI_TREE) to take the value of
500938fd1498Szrj 	 the new cond_expr (INDEX_COND_EXPR).  */
501038fd1498Szrj 
501138fd1498Szrj       /* Duplicate the condition from vec_stmt.  */
501238fd1498Szrj       tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
501338fd1498Szrj 
501438fd1498Szrj       /* Create a conditional, where the condition is taken from vec_stmt
501538fd1498Szrj 	 (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
501638fd1498Szrj 	 else is the phi (NEW_PHI_TREE).  */
501738fd1498Szrj       tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
501838fd1498Szrj 				     ccompare, indx_before_incr,
501938fd1498Szrj 				     new_phi_tree);
502038fd1498Szrj       induction_index = make_ssa_name (cr_index_vector_type);
502138fd1498Szrj       gimple *index_condition = gimple_build_assign (induction_index,
502238fd1498Szrj 						     index_cond_expr);
502338fd1498Szrj       gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
502438fd1498Szrj       stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
502538fd1498Szrj 							loop_vinfo);
502638fd1498Szrj       STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
502738fd1498Szrj       set_vinfo_for_stmt (index_condition, index_vec_info);
502838fd1498Szrj 
502938fd1498Szrj       /* Update the phi with the vec cond.  */
503038fd1498Szrj       add_phi_arg (as_a <gphi *> (new_phi), induction_index,
503138fd1498Szrj 		   loop_latch_edge (loop), UNKNOWN_LOCATION);
503238fd1498Szrj     }
503338fd1498Szrj 
503438fd1498Szrj   /* 2. Create epilog code.
503538fd1498Szrj         The reduction epilog code operates across the elements of the vector
503638fd1498Szrj         of partial results computed by the vectorized loop.
503738fd1498Szrj         The reduction epilog code consists of:
503838fd1498Szrj 
503938fd1498Szrj         step 1: compute the scalar result in a vector (v_out2)
504038fd1498Szrj         step 2: extract the scalar result (s_out3) from the vector (v_out2)
504138fd1498Szrj         step 3: adjust the scalar result (s_out3) if needed.
504238fd1498Szrj 
504338fd1498Szrj         Step 1 can be accomplished using one the following three schemes:
504438fd1498Szrj           (scheme 1) using reduc_fn, if available.
504538fd1498Szrj           (scheme 2) using whole-vector shifts, if available.
504638fd1498Szrj           (scheme 3) using a scalar loop. In this case steps 1+2 above are
504738fd1498Szrj                      combined.
504838fd1498Szrj 
504938fd1498Szrj           The overall epilog code looks like this:
505038fd1498Szrj 
505138fd1498Szrj           s_out0 = phi <s_loop>         # original EXIT_PHI
505238fd1498Szrj           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
505338fd1498Szrj           v_out2 = reduce <v_out1>              # step 1
505438fd1498Szrj           s_out3 = extract_field <v_out2, 0>    # step 2
505538fd1498Szrj           s_out4 = adjust_result <s_out3>       # step 3
505638fd1498Szrj 
505738fd1498Szrj           (step 3 is optional, and steps 1 and 2 may be combined).
505838fd1498Szrj           Lastly, the uses of s_out0 are replaced by s_out4.  */
505938fd1498Szrj 
506038fd1498Szrj 
506138fd1498Szrj   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
506238fd1498Szrj          v_out1 = phi <VECT_DEF>
506338fd1498Szrj          Store them in NEW_PHIS.  */
506438fd1498Szrj 
506538fd1498Szrj   exit_bb = single_exit (loop)->dest;
506638fd1498Szrj   prev_phi_info = NULL;
506738fd1498Szrj   new_phis.create (vect_defs.length ());
506838fd1498Szrj   FOR_EACH_VEC_ELT (vect_defs, i, def)
506938fd1498Szrj     {
507038fd1498Szrj       for (j = 0; j < ncopies; j++)
507138fd1498Szrj         {
507238fd1498Szrj 	  tree new_def = copy_ssa_name (def);
507338fd1498Szrj           phi = create_phi_node (new_def, exit_bb);
507438fd1498Szrj           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
507538fd1498Szrj           if (j == 0)
507638fd1498Szrj             new_phis.quick_push (phi);
507738fd1498Szrj           else
507838fd1498Szrj 	    {
507938fd1498Szrj 	      def = vect_get_vec_def_for_stmt_copy (dt, def);
508038fd1498Szrj 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
508138fd1498Szrj 	    }
508238fd1498Szrj 
508338fd1498Szrj           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
508438fd1498Szrj           prev_phi_info = vinfo_for_stmt (phi);
508538fd1498Szrj         }
508638fd1498Szrj     }
508738fd1498Szrj 
508838fd1498Szrj   /* The epilogue is created for the outer-loop, i.e., for the loop being
508938fd1498Szrj      vectorized.  Create exit phis for the outer loop.  */
509038fd1498Szrj   if (double_reduc)
509138fd1498Szrj     {
509238fd1498Szrj       loop = outer_loop;
509338fd1498Szrj       exit_bb = single_exit (loop)->dest;
509438fd1498Szrj       inner_phis.create (vect_defs.length ());
509538fd1498Szrj       FOR_EACH_VEC_ELT (new_phis, i, phi)
509638fd1498Szrj 	{
509738fd1498Szrj 	  tree new_result = copy_ssa_name (PHI_RESULT (phi));
509838fd1498Szrj 	  gphi *outer_phi = create_phi_node (new_result, exit_bb);
509938fd1498Szrj 	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
510038fd1498Szrj 			   PHI_RESULT (phi));
510138fd1498Szrj 	  set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
510238fd1498Szrj 							    loop_vinfo));
510338fd1498Szrj 	  inner_phis.quick_push (phi);
510438fd1498Szrj 	  new_phis[i] = outer_phi;
510538fd1498Szrj 	  prev_phi_info = vinfo_for_stmt (outer_phi);
510638fd1498Szrj           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
510738fd1498Szrj             {
510838fd1498Szrj 	      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
510938fd1498Szrj 	      new_result = copy_ssa_name (PHI_RESULT (phi));
511038fd1498Szrj 	      outer_phi = create_phi_node (new_result, exit_bb);
511138fd1498Szrj 	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
511238fd1498Szrj 			       PHI_RESULT (phi));
511338fd1498Szrj 	      set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
511438fd1498Szrj 								loop_vinfo));
511538fd1498Szrj 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
511638fd1498Szrj 	      prev_phi_info = vinfo_for_stmt (outer_phi);
511738fd1498Szrj 	    }
511838fd1498Szrj 	}
511938fd1498Szrj     }
512038fd1498Szrj 
512138fd1498Szrj   exit_gsi = gsi_after_labels (exit_bb);
512238fd1498Szrj 
512338fd1498Szrj   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
512438fd1498Szrj          (i.e. when reduc_fn is not available) and in the final adjustment
512538fd1498Szrj 	 code (if needed).  Also get the original scalar reduction variable as
512638fd1498Szrj          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
512738fd1498Szrj          represents a reduction pattern), the tree-code and scalar-def are
512838fd1498Szrj          taken from the original stmt that the pattern-stmt (STMT) replaces.
512938fd1498Szrj          Otherwise (it is a regular reduction) - the tree-code and scalar-def
513038fd1498Szrj          are taken from STMT.  */
513138fd1498Szrj 
513238fd1498Szrj   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
513338fd1498Szrj   if (!orig_stmt)
513438fd1498Szrj     {
513538fd1498Szrj       /* Regular reduction  */
513638fd1498Szrj       orig_stmt = stmt;
513738fd1498Szrj     }
513838fd1498Szrj   else
513938fd1498Szrj     {
514038fd1498Szrj       /* Reduction pattern  */
514138fd1498Szrj       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
514238fd1498Szrj       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
514338fd1498Szrj       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
514438fd1498Szrj     }
514538fd1498Szrj 
514638fd1498Szrj   code = gimple_assign_rhs_code (orig_stmt);
514738fd1498Szrj   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
514838fd1498Szrj      partial results are added and not subtracted.  */
514938fd1498Szrj   if (code == MINUS_EXPR)
515038fd1498Szrj     code = PLUS_EXPR;
515138fd1498Szrj 
515238fd1498Szrj   scalar_dest = gimple_assign_lhs (orig_stmt);
515338fd1498Szrj   scalar_type = TREE_TYPE (scalar_dest);
515438fd1498Szrj   scalar_results.create (group_size);
515538fd1498Szrj   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
515638fd1498Szrj   bitsize = TYPE_SIZE (scalar_type);
515738fd1498Szrj 
515838fd1498Szrj   /* In case this is a reduction in an inner-loop while vectorizing an outer
515938fd1498Szrj      loop - we don't need to extract a single scalar result at the end of the
516038fd1498Szrj      inner-loop (unless it is double reduction, i.e., the use of reduction is
516138fd1498Szrj      outside the outer-loop).  The final vector of partial results will be used
516238fd1498Szrj      in the vectorized outer-loop, or reduced to a scalar result at the end of
516338fd1498Szrj      the outer-loop.  */
516438fd1498Szrj   if (nested_in_vect_loop && !double_reduc)
516538fd1498Szrj     goto vect_finalize_reduction;
516638fd1498Szrj 
516738fd1498Szrj   /* SLP reduction without reduction chain, e.g.,
516838fd1498Szrj      # a1 = phi <a2, a0>
516938fd1498Szrj      # b1 = phi <b2, b0>
517038fd1498Szrj      a2 = operation (a1)
517138fd1498Szrj      b2 = operation (b1)  */
517238fd1498Szrj   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
517338fd1498Szrj 
517438fd1498Szrj   /* True if we should implement SLP_REDUC using native reduction operations
517538fd1498Szrj      instead of scalar operations.  */
517638fd1498Szrj   direct_slp_reduc = (reduc_fn != IFN_LAST
517738fd1498Szrj 		      && slp_reduc
517838fd1498Szrj 		      && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
517938fd1498Szrj 
518038fd1498Szrj   /* In case of reduction chain, e.g.,
518138fd1498Szrj      # a1 = phi <a3, a0>
518238fd1498Szrj      a2 = operation (a1)
518338fd1498Szrj      a3 = operation (a2),
518438fd1498Szrj 
518538fd1498Szrj      we may end up with more than one vector result.  Here we reduce them to
518638fd1498Szrj      one vector.  */
518738fd1498Szrj   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) || direct_slp_reduc)
518838fd1498Szrj     {
518938fd1498Szrj       tree first_vect = PHI_RESULT (new_phis[0]);
519038fd1498Szrj       gassign *new_vec_stmt = NULL;
519138fd1498Szrj       vec_dest = vect_create_destination_var (scalar_dest, vectype);
519238fd1498Szrj       for (k = 1; k < new_phis.length (); k++)
519338fd1498Szrj         {
519438fd1498Szrj 	  gimple *next_phi = new_phis[k];
519538fd1498Szrj           tree second_vect = PHI_RESULT (next_phi);
519638fd1498Szrj           tree tem = make_ssa_name (vec_dest, new_vec_stmt);
519738fd1498Szrj           new_vec_stmt = gimple_build_assign (tem, code,
519838fd1498Szrj 					      first_vect, second_vect);
519938fd1498Szrj           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
520038fd1498Szrj 	  first_vect = tem;
520138fd1498Szrj         }
520238fd1498Szrj 
520338fd1498Szrj       new_phi_result = first_vect;
520438fd1498Szrj       if (new_vec_stmt)
520538fd1498Szrj         {
520638fd1498Szrj           new_phis.truncate (0);
520738fd1498Szrj           new_phis.safe_push (new_vec_stmt);
520838fd1498Szrj         }
520938fd1498Szrj     }
521038fd1498Szrj   /* Likewise if we couldn't use a single defuse cycle.  */
521138fd1498Szrj   else if (ncopies > 1)
521238fd1498Szrj     {
521338fd1498Szrj       gcc_assert (new_phis.length () == 1);
521438fd1498Szrj       tree first_vect = PHI_RESULT (new_phis[0]);
521538fd1498Szrj       gassign *new_vec_stmt = NULL;
521638fd1498Szrj       vec_dest = vect_create_destination_var (scalar_dest, vectype);
521738fd1498Szrj       gimple *next_phi = new_phis[0];
521838fd1498Szrj       for (int k = 1; k < ncopies; ++k)
521938fd1498Szrj 	{
522038fd1498Szrj 	  next_phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next_phi));
522138fd1498Szrj 	  tree second_vect = PHI_RESULT (next_phi);
522238fd1498Szrj           tree tem = make_ssa_name (vec_dest, new_vec_stmt);
522338fd1498Szrj           new_vec_stmt = gimple_build_assign (tem, code,
522438fd1498Szrj 					      first_vect, second_vect);
522538fd1498Szrj           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
522638fd1498Szrj 	  first_vect = tem;
522738fd1498Szrj 	}
522838fd1498Szrj       new_phi_result = first_vect;
522938fd1498Szrj       new_phis.truncate (0);
523038fd1498Szrj       new_phis.safe_push (new_vec_stmt);
523138fd1498Szrj     }
523238fd1498Szrj   else
523338fd1498Szrj     new_phi_result = PHI_RESULT (new_phis[0]);
523438fd1498Szrj 
523538fd1498Szrj   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
523638fd1498Szrj       && reduc_fn != IFN_LAST)
523738fd1498Szrj     {
523838fd1498Szrj       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
523938fd1498Szrj 	 various data values where the condition matched and another vector
524038fd1498Szrj 	 (INDUCTION_INDEX) containing all the indexes of those matches.  We
524138fd1498Szrj 	 need to extract the last matching index (which will be the index with
524238fd1498Szrj 	 highest value) and use this to index into the data vector.
524338fd1498Szrj 	 For the case where there were no matches, the data vector will contain
524438fd1498Szrj 	 all default values and the index vector will be all zeros.  */
524538fd1498Szrj 
524638fd1498Szrj       /* Get various versions of the type of the vector of indexes.  */
524738fd1498Szrj       tree index_vec_type = TREE_TYPE (induction_index);
524838fd1498Szrj       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
524938fd1498Szrj       tree index_scalar_type = TREE_TYPE (index_vec_type);
525038fd1498Szrj       tree index_vec_cmp_type = build_same_sized_truth_vector_type
525138fd1498Szrj 	(index_vec_type);
525238fd1498Szrj 
525338fd1498Szrj       /* Get an unsigned integer version of the type of the data vector.  */
525438fd1498Szrj       int scalar_precision
525538fd1498Szrj 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
525638fd1498Szrj       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
525738fd1498Szrj       tree vectype_unsigned = build_vector_type
525838fd1498Szrj 	(scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
525938fd1498Szrj 
526038fd1498Szrj       /* First we need to create a vector (ZERO_VEC) of zeros and another
526138fd1498Szrj 	 vector (MAX_INDEX_VEC) filled with the last matching index, which we
526238fd1498Szrj 	 can create using a MAX reduction and then expanding.
526338fd1498Szrj 	 In the case where the loop never made any matches, the max index will
526438fd1498Szrj 	 be zero.  */
526538fd1498Szrj 
526638fd1498Szrj       /* Vector of {0, 0, 0,...}.  */
526738fd1498Szrj       tree zero_vec = make_ssa_name (vectype);
526838fd1498Szrj       tree zero_vec_rhs = build_zero_cst (vectype);
526938fd1498Szrj       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
527038fd1498Szrj       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
527138fd1498Szrj 
527238fd1498Szrj       /* Find maximum value from the vector of found indexes.  */
527338fd1498Szrj       tree max_index = make_ssa_name (index_scalar_type);
527438fd1498Szrj       gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
527538fd1498Szrj 							  1, induction_index);
527638fd1498Szrj       gimple_call_set_lhs (max_index_stmt, max_index);
527738fd1498Szrj       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
527838fd1498Szrj 
527938fd1498Szrj       /* Vector of {max_index, max_index, max_index,...}.  */
528038fd1498Szrj       tree max_index_vec = make_ssa_name (index_vec_type);
528138fd1498Szrj       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
528238fd1498Szrj 						      max_index);
528338fd1498Szrj       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
528438fd1498Szrj 							max_index_vec_rhs);
528538fd1498Szrj       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
528638fd1498Szrj 
528738fd1498Szrj       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
528838fd1498Szrj 	 with the vector (INDUCTION_INDEX) of found indexes, choosing values
528938fd1498Szrj 	 from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
529038fd1498Szrj 	 otherwise.  Only one value should match, resulting in a vector
529138fd1498Szrj 	 (VEC_COND) with one data value and the rest zeros.
529238fd1498Szrj 	 In the case where the loop never made any matches, every index will
529338fd1498Szrj 	 match, resulting in a vector with all data values (which will all be
529438fd1498Szrj 	 the default value).  */
529538fd1498Szrj 
529638fd1498Szrj       /* Compare the max index vector to the vector of found indexes to find
529738fd1498Szrj 	 the position of the max value.  */
529838fd1498Szrj       tree vec_compare = make_ssa_name (index_vec_cmp_type);
529938fd1498Szrj       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
530038fd1498Szrj 						      induction_index,
530138fd1498Szrj 						      max_index_vec);
530238fd1498Szrj       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
530338fd1498Szrj 
530438fd1498Szrj       /* Use the compare to choose either values from the data vector or
530538fd1498Szrj 	 zero.  */
530638fd1498Szrj       tree vec_cond = make_ssa_name (vectype);
530738fd1498Szrj       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
530838fd1498Szrj 						   vec_compare, new_phi_result,
530938fd1498Szrj 						   zero_vec);
531038fd1498Szrj       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
531138fd1498Szrj 
531238fd1498Szrj       /* Finally we need to extract the data value from the vector (VEC_COND)
531338fd1498Szrj 	 into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
531438fd1498Szrj 	 reduction, but because this doesn't exist, we can use a MAX reduction
531538fd1498Szrj 	 instead.  The data value might be signed or a float so we need to cast
531638fd1498Szrj 	 it first.
531738fd1498Szrj 	 In the case where the loop never made any matches, the data values are
531838fd1498Szrj 	 all identical, and so will reduce down correctly.  */
531938fd1498Szrj 
532038fd1498Szrj       /* Make the matched data values unsigned.  */
532138fd1498Szrj       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
532238fd1498Szrj       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
532338fd1498Szrj 				       vec_cond);
532438fd1498Szrj       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
532538fd1498Szrj 							VIEW_CONVERT_EXPR,
532638fd1498Szrj 							vec_cond_cast_rhs);
532738fd1498Szrj       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
532838fd1498Szrj 
532938fd1498Szrj       /* Reduce down to a scalar value.  */
533038fd1498Szrj       tree data_reduc = make_ssa_name (scalar_type_unsigned);
533138fd1498Szrj       gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
533238fd1498Szrj 							   1, vec_cond_cast);
533338fd1498Szrj       gimple_call_set_lhs (data_reduc_stmt, data_reduc);
533438fd1498Szrj       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
533538fd1498Szrj 
533638fd1498Szrj       /* Convert the reduced value back to the result type and set as the
533738fd1498Szrj 	 result.  */
533838fd1498Szrj       gimple_seq stmts = NULL;
533938fd1498Szrj       new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type,
534038fd1498Szrj 			       data_reduc);
534138fd1498Szrj       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
534238fd1498Szrj       scalar_results.safe_push (new_temp);
534338fd1498Szrj     }
534438fd1498Szrj   else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
534538fd1498Szrj 	   && reduc_fn == IFN_LAST)
534638fd1498Szrj     {
534738fd1498Szrj       /* Condition reduction without supported IFN_REDUC_MAX.  Generate
534838fd1498Szrj 	 idx = 0;
534938fd1498Szrj          idx_val = induction_index[0];
535038fd1498Szrj 	 val = data_reduc[0];
535138fd1498Szrj          for (idx = 0, val = init, i = 0; i < nelts; ++i)
535238fd1498Szrj 	   if (induction_index[i] > idx_val)
535338fd1498Szrj 	     val = data_reduc[i], idx_val = induction_index[i];
535438fd1498Szrj 	 return val;  */
535538fd1498Szrj 
535638fd1498Szrj       tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
535738fd1498Szrj       tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
535838fd1498Szrj       unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
535938fd1498Szrj       poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
536038fd1498Szrj       /* Enforced by vectorizable_reduction, which ensures we have target
536138fd1498Szrj 	 support before allowing a conditional reduction on variable-length
536238fd1498Szrj 	 vectors.  */
536338fd1498Szrj       unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant ();
536438fd1498Szrj       tree idx_val = NULL_TREE, val = NULL_TREE;
536538fd1498Szrj       for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size)
536638fd1498Szrj 	{
536738fd1498Szrj 	  tree old_idx_val = idx_val;
536838fd1498Szrj 	  tree old_val = val;
536938fd1498Szrj 	  idx_val = make_ssa_name (idx_eltype);
537038fd1498Szrj 	  epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
537138fd1498Szrj 					     build3 (BIT_FIELD_REF, idx_eltype,
537238fd1498Szrj 						     induction_index,
537338fd1498Szrj 						     bitsize_int (el_size),
537438fd1498Szrj 						     bitsize_int (off)));
537538fd1498Szrj 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
537638fd1498Szrj 	  val = make_ssa_name (data_eltype);
537738fd1498Szrj 	  epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
537838fd1498Szrj 					     build3 (BIT_FIELD_REF,
537938fd1498Szrj 						     data_eltype,
538038fd1498Szrj 						     new_phi_result,
538138fd1498Szrj 						     bitsize_int (el_size),
538238fd1498Szrj 						     bitsize_int (off)));
538338fd1498Szrj 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
538438fd1498Szrj 	  if (off != 0)
538538fd1498Szrj 	    {
538638fd1498Szrj 	      tree new_idx_val = idx_val;
538738fd1498Szrj 	      tree new_val = val;
538838fd1498Szrj 	      if (off != v_size - el_size)
538938fd1498Szrj 		{
539038fd1498Szrj 		  new_idx_val = make_ssa_name (idx_eltype);
539138fd1498Szrj 		  epilog_stmt = gimple_build_assign (new_idx_val,
539238fd1498Szrj 						     MAX_EXPR, idx_val,
539338fd1498Szrj 						     old_idx_val);
539438fd1498Szrj 		  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
539538fd1498Szrj 		}
539638fd1498Szrj 	      new_val = make_ssa_name (data_eltype);
539738fd1498Szrj 	      epilog_stmt = gimple_build_assign (new_val,
539838fd1498Szrj 						 COND_EXPR,
539938fd1498Szrj 						 build2 (GT_EXPR,
540038fd1498Szrj 							 boolean_type_node,
540138fd1498Szrj 							 idx_val,
540238fd1498Szrj 							 old_idx_val),
540338fd1498Szrj 						 val, old_val);
540438fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
540538fd1498Szrj 	      idx_val = new_idx_val;
540638fd1498Szrj 	      val = new_val;
540738fd1498Szrj 	    }
540838fd1498Szrj 	}
540938fd1498Szrj       /* Convert the reduced value back to the result type and set as the
541038fd1498Szrj 	 result.  */
541138fd1498Szrj       gimple_seq stmts = NULL;
541238fd1498Szrj       val = gimple_convert (&stmts, scalar_type, val);
541338fd1498Szrj       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
541438fd1498Szrj       scalar_results.safe_push (val);
541538fd1498Szrj     }
541638fd1498Szrj 
541738fd1498Szrj   /* 2.3 Create the reduction code, using one of the three schemes described
541838fd1498Szrj          above. In SLP we simply need to extract all the elements from the
541938fd1498Szrj          vector (without reducing them), so we use scalar shifts.  */
542038fd1498Szrj   else if (reduc_fn != IFN_LAST && !slp_reduc)
542138fd1498Szrj     {
542238fd1498Szrj       tree tmp;
542338fd1498Szrj       tree vec_elem_type;
542438fd1498Szrj 
542538fd1498Szrj       /* Case 1:  Create:
542638fd1498Szrj          v_out2 = reduc_expr <v_out1>  */
542738fd1498Szrj 
542838fd1498Szrj       if (dump_enabled_p ())
542938fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
543038fd1498Szrj 			 "Reduce using direct vector reduction.\n");
543138fd1498Szrj 
543238fd1498Szrj       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
543338fd1498Szrj       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
543438fd1498Szrj 	{
543538fd1498Szrj 	  tree tmp_dest
543638fd1498Szrj 	    = vect_create_destination_var (scalar_dest, vec_elem_type);
543738fd1498Szrj 	  epilog_stmt = gimple_build_call_internal (reduc_fn, 1,
543838fd1498Szrj 						    new_phi_result);
543938fd1498Szrj 	  gimple_set_lhs (epilog_stmt, tmp_dest);
544038fd1498Szrj 	  new_temp = make_ssa_name (tmp_dest, epilog_stmt);
544138fd1498Szrj 	  gimple_set_lhs (epilog_stmt, new_temp);
544238fd1498Szrj 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
544338fd1498Szrj 
544438fd1498Szrj 	  epilog_stmt = gimple_build_assign (new_scalar_dest, NOP_EXPR,
544538fd1498Szrj 					     new_temp);
544638fd1498Szrj 	}
544738fd1498Szrj       else
544838fd1498Szrj 	{
544938fd1498Szrj 	  epilog_stmt = gimple_build_call_internal (reduc_fn, 1,
545038fd1498Szrj 						    new_phi_result);
545138fd1498Szrj 	  gimple_set_lhs (epilog_stmt, new_scalar_dest);
545238fd1498Szrj 	}
545338fd1498Szrj 
545438fd1498Szrj       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
545538fd1498Szrj       gimple_set_lhs (epilog_stmt, new_temp);
545638fd1498Szrj       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
545738fd1498Szrj 
545838fd1498Szrj       if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
545938fd1498Szrj 	   == INTEGER_INDUC_COND_REDUCTION)
546038fd1498Szrj 	  && !operand_equal_p (initial_def, induc_val, 0))
546138fd1498Szrj 	{
546238fd1498Szrj 	  /* Earlier we set the initial value to be a vector if induc_val
546338fd1498Szrj 	     values.  Check the result and if it is induc_val then replace
546438fd1498Szrj 	     with the original initial value, unless induc_val is
546538fd1498Szrj 	     the same as initial_def already.  */
546638fd1498Szrj 	  tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
546738fd1498Szrj 				  induc_val);
546838fd1498Szrj 
546938fd1498Szrj 	  tmp = make_ssa_name (new_scalar_dest);
547038fd1498Szrj 	  epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
547138fd1498Szrj 					     initial_def, new_temp);
547238fd1498Szrj 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
547338fd1498Szrj 	  new_temp = tmp;
547438fd1498Szrj 	}
547538fd1498Szrj 
547638fd1498Szrj       scalar_results.safe_push (new_temp);
547738fd1498Szrj     }
547838fd1498Szrj   else if (direct_slp_reduc)
547938fd1498Szrj     {
548038fd1498Szrj       /* Here we create one vector for each of the GROUP_SIZE results,
548138fd1498Szrj 	 with the elements for other SLP statements replaced with the
548238fd1498Szrj 	 neutral value.  We can then do a normal reduction on each vector.  */
548338fd1498Szrj 
548438fd1498Szrj       /* Enforced by vectorizable_reduction.  */
548538fd1498Szrj       gcc_assert (new_phis.length () == 1);
548638fd1498Szrj       gcc_assert (pow2p_hwi (group_size));
548738fd1498Szrj 
548838fd1498Szrj       slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis;
548938fd1498Szrj       vec<gimple *> orig_phis = SLP_TREE_SCALAR_STMTS (orig_phis_slp_node);
549038fd1498Szrj       gimple_seq seq = NULL;
549138fd1498Szrj 
549238fd1498Szrj       /* Build a vector {0, 1, 2, ...}, with the same number of elements
549338fd1498Szrj 	 and the same element size as VECTYPE.  */
549438fd1498Szrj       tree index = build_index_vector (vectype, 0, 1);
549538fd1498Szrj       tree index_type = TREE_TYPE (index);
549638fd1498Szrj       tree index_elt_type = TREE_TYPE (index_type);
549738fd1498Szrj       tree mask_type = build_same_sized_truth_vector_type (index_type);
549838fd1498Szrj 
549938fd1498Szrj       /* Create a vector that, for each element, identifies which of
550038fd1498Szrj 	 the GROUP_SIZE results should use it.  */
550138fd1498Szrj       tree index_mask = build_int_cst (index_elt_type, group_size - 1);
550238fd1498Szrj       index = gimple_build (&seq, BIT_AND_EXPR, index_type, index,
550338fd1498Szrj 			    build_vector_from_val (index_type, index_mask));
550438fd1498Szrj 
550538fd1498Szrj       /* Get a neutral vector value.  This is simply a splat of the neutral
550638fd1498Szrj 	 scalar value if we have one, otherwise the initial scalar value
550738fd1498Szrj 	 is itself a neutral value.  */
550838fd1498Szrj       tree vector_identity = NULL_TREE;
550938fd1498Szrj       if (neutral_op)
551038fd1498Szrj 	vector_identity = gimple_build_vector_from_val (&seq, vectype,
551138fd1498Szrj 							neutral_op);
551238fd1498Szrj       for (unsigned int i = 0; i < group_size; ++i)
551338fd1498Szrj 	{
551438fd1498Szrj 	  /* If there's no univeral neutral value, we can use the
551538fd1498Szrj 	     initial scalar value from the original PHI.  This is used
551638fd1498Szrj 	     for MIN and MAX reduction, for example.  */
551738fd1498Szrj 	  if (!neutral_op)
551838fd1498Szrj 	    {
551938fd1498Szrj 	      tree scalar_value
552038fd1498Szrj 		= PHI_ARG_DEF_FROM_EDGE (orig_phis[i],
552138fd1498Szrj 					 loop_preheader_edge (loop));
552238fd1498Szrj 	      vector_identity = gimple_build_vector_from_val (&seq, vectype,
552338fd1498Szrj 							      scalar_value);
552438fd1498Szrj 	    }
552538fd1498Szrj 
552638fd1498Szrj 	  /* Calculate the equivalent of:
552738fd1498Szrj 
552838fd1498Szrj 	     sel[j] = (index[j] == i);
552938fd1498Szrj 
553038fd1498Szrj 	     which selects the elements of NEW_PHI_RESULT that should
553138fd1498Szrj 	     be included in the result.  */
553238fd1498Szrj 	  tree compare_val = build_int_cst (index_elt_type, i);
553338fd1498Szrj 	  compare_val = build_vector_from_val (index_type, compare_val);
553438fd1498Szrj 	  tree sel = gimple_build (&seq, EQ_EXPR, mask_type,
553538fd1498Szrj 				   index, compare_val);
553638fd1498Szrj 
553738fd1498Szrj 	  /* Calculate the equivalent of:
553838fd1498Szrj 
553938fd1498Szrj 	     vec = seq ? new_phi_result : vector_identity;
554038fd1498Szrj 
554138fd1498Szrj 	     VEC is now suitable for a full vector reduction.  */
554238fd1498Szrj 	  tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype,
554338fd1498Szrj 				   sel, new_phi_result, vector_identity);
554438fd1498Szrj 
554538fd1498Szrj 	  /* Do the reduction and convert it to the appropriate type.  */
554638fd1498Szrj 	  gcall *call = gimple_build_call_internal (reduc_fn, 1, vec);
554738fd1498Szrj 	  tree scalar = make_ssa_name (TREE_TYPE (vectype));
554838fd1498Szrj 	  gimple_call_set_lhs (call, scalar);
554938fd1498Szrj 	  gimple_seq_add_stmt (&seq, call);
555038fd1498Szrj 	  scalar = gimple_convert (&seq, scalar_type, scalar);
555138fd1498Szrj 	  scalar_results.safe_push (scalar);
555238fd1498Szrj 	}
555338fd1498Szrj       gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT);
555438fd1498Szrj     }
555538fd1498Szrj   else
555638fd1498Szrj     {
555738fd1498Szrj       bool reduce_with_shift;
555838fd1498Szrj       tree vec_temp;
555938fd1498Szrj 
556038fd1498Szrj       /* COND reductions all do the final reduction with MAX_EXPR
556138fd1498Szrj 	 or MIN_EXPR.  */
556238fd1498Szrj       if (code == COND_EXPR)
556338fd1498Szrj 	{
556438fd1498Szrj 	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
556538fd1498Szrj 	      == INTEGER_INDUC_COND_REDUCTION)
556638fd1498Szrj 	    code = induc_code;
5567*58e805e6Szrj 	  else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5568*58e805e6Szrj 		   == CONST_COND_REDUCTION)
5569*58e805e6Szrj 	    code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
557038fd1498Szrj 	  else
557138fd1498Szrj 	    code = MAX_EXPR;
557238fd1498Szrj 	}
557338fd1498Szrj 
557438fd1498Szrj       /* See if the target wants to do the final (shift) reduction
557538fd1498Szrj 	 in a vector mode of smaller size and first reduce upper/lower
557638fd1498Szrj 	 halves against each other.  */
557738fd1498Szrj       enum machine_mode mode1 = mode;
557838fd1498Szrj       tree vectype1 = vectype;
557938fd1498Szrj       unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
558038fd1498Szrj       unsigned sz1 = sz;
558138fd1498Szrj       if (!slp_reduc
558238fd1498Szrj 	  && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
558338fd1498Szrj 	sz1 = GET_MODE_SIZE (mode1).to_constant ();
558438fd1498Szrj 
558538fd1498Szrj       vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
558638fd1498Szrj       reduce_with_shift = have_whole_vector_shift (mode1);
558738fd1498Szrj       if (!VECTOR_MODE_P (mode1))
558838fd1498Szrj 	reduce_with_shift = false;
558938fd1498Szrj       else
559038fd1498Szrj 	{
559138fd1498Szrj 	  optab optab = optab_for_tree_code (code, vectype1, optab_default);
559238fd1498Szrj 	  if (optab_handler (optab, mode1) == CODE_FOR_nothing)
559338fd1498Szrj 	    reduce_with_shift = false;
559438fd1498Szrj 	}
559538fd1498Szrj 
559638fd1498Szrj       /* First reduce the vector to the desired vector size we should
559738fd1498Szrj 	 do shift reduction on by combining upper and lower halves.  */
559838fd1498Szrj       new_temp = new_phi_result;
559938fd1498Szrj       while (sz > sz1)
560038fd1498Szrj 	{
560138fd1498Szrj 	  gcc_assert (!slp_reduc);
560238fd1498Szrj 	  sz /= 2;
560338fd1498Szrj 	  vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
560438fd1498Szrj 
560538fd1498Szrj 	  /* The target has to make sure we support lowpart/highpart
560638fd1498Szrj 	     extraction, either via direct vector extract or through
560738fd1498Szrj 	     an integer mode punning.  */
560838fd1498Szrj 	  tree dst1, dst2;
560938fd1498Szrj 	  if (convert_optab_handler (vec_extract_optab,
561038fd1498Szrj 				     TYPE_MODE (TREE_TYPE (new_temp)),
561138fd1498Szrj 				     TYPE_MODE (vectype1))
561238fd1498Szrj 	      != CODE_FOR_nothing)
561338fd1498Szrj 	    {
561438fd1498Szrj 	      /* Extract sub-vectors directly once vec_extract becomes
561538fd1498Szrj 		 a conversion optab.  */
561638fd1498Szrj 	      dst1 = make_ssa_name (vectype1);
561738fd1498Szrj 	      epilog_stmt
561838fd1498Szrj 		  = gimple_build_assign (dst1, BIT_FIELD_REF,
561938fd1498Szrj 					 build3 (BIT_FIELD_REF, vectype1,
562038fd1498Szrj 						 new_temp, TYPE_SIZE (vectype1),
562138fd1498Szrj 						 bitsize_int (0)));
562238fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
562338fd1498Szrj 	      dst2 =  make_ssa_name (vectype1);
562438fd1498Szrj 	      epilog_stmt
562538fd1498Szrj 		  = gimple_build_assign (dst2, BIT_FIELD_REF,
562638fd1498Szrj 					 build3 (BIT_FIELD_REF, vectype1,
562738fd1498Szrj 						 new_temp, TYPE_SIZE (vectype1),
562838fd1498Szrj 						 bitsize_int (sz * BITS_PER_UNIT)));
562938fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
563038fd1498Szrj 	    }
563138fd1498Szrj 	  else
563238fd1498Szrj 	    {
563338fd1498Szrj 	      /* Extract via punning to appropriately sized integer mode
563438fd1498Szrj 		 vector.  */
563538fd1498Szrj 	      tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT,
563638fd1498Szrj 							    1);
563738fd1498Szrj 	      tree etype = build_vector_type (eltype, 2);
563838fd1498Szrj 	      gcc_assert (convert_optab_handler (vec_extract_optab,
563938fd1498Szrj 						 TYPE_MODE (etype),
564038fd1498Szrj 						 TYPE_MODE (eltype))
564138fd1498Szrj 			  != CODE_FOR_nothing);
564238fd1498Szrj 	      tree tem = make_ssa_name (etype);
564338fd1498Szrj 	      epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
564438fd1498Szrj 						 build1 (VIEW_CONVERT_EXPR,
564538fd1498Szrj 							 etype, new_temp));
564638fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
564738fd1498Szrj 	      new_temp = tem;
564838fd1498Szrj 	      tem = make_ssa_name (eltype);
564938fd1498Szrj 	      epilog_stmt
565038fd1498Szrj 		  = gimple_build_assign (tem, BIT_FIELD_REF,
565138fd1498Szrj 					 build3 (BIT_FIELD_REF, eltype,
565238fd1498Szrj 						 new_temp, TYPE_SIZE (eltype),
565338fd1498Szrj 						 bitsize_int (0)));
565438fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
565538fd1498Szrj 	      dst1 = make_ssa_name (vectype1);
565638fd1498Szrj 	      epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
565738fd1498Szrj 						 build1 (VIEW_CONVERT_EXPR,
565838fd1498Szrj 							 vectype1, tem));
565938fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
566038fd1498Szrj 	      tem = make_ssa_name (eltype);
566138fd1498Szrj 	      epilog_stmt
566238fd1498Szrj 		  = gimple_build_assign (tem, BIT_FIELD_REF,
566338fd1498Szrj 					 build3 (BIT_FIELD_REF, eltype,
566438fd1498Szrj 						 new_temp, TYPE_SIZE (eltype),
566538fd1498Szrj 						 bitsize_int (sz * BITS_PER_UNIT)));
566638fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
566738fd1498Szrj 	      dst2 =  make_ssa_name (vectype1);
566838fd1498Szrj 	      epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
566938fd1498Szrj 						 build1 (VIEW_CONVERT_EXPR,
567038fd1498Szrj 							 vectype1, tem));
567138fd1498Szrj 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
567238fd1498Szrj 	    }
567338fd1498Szrj 
567438fd1498Szrj 	  new_temp = make_ssa_name (vectype1);
567538fd1498Szrj 	  epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2);
567638fd1498Szrj 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
567738fd1498Szrj 	}
567838fd1498Szrj 
567938fd1498Szrj       if (reduce_with_shift && !slp_reduc)
568038fd1498Szrj 	{
568138fd1498Szrj 	  int element_bitsize = tree_to_uhwi (bitsize);
568238fd1498Szrj 	  /* Enforced by vectorizable_reduction, which disallows SLP reductions
568338fd1498Szrj 	     for variable-length vectors and also requires direct target support
568438fd1498Szrj 	     for loop reductions.  */
568538fd1498Szrj 	  int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
568638fd1498Szrj 	  int nelements = vec_size_in_bits / element_bitsize;
568738fd1498Szrj 	  vec_perm_builder sel;
568838fd1498Szrj 	  vec_perm_indices indices;
568938fd1498Szrj 
569038fd1498Szrj           int elt_offset;
569138fd1498Szrj 
569238fd1498Szrj           tree zero_vec = build_zero_cst (vectype1);
569338fd1498Szrj           /* Case 2: Create:
569438fd1498Szrj              for (offset = nelements/2; offset >= 1; offset/=2)
569538fd1498Szrj                 {
569638fd1498Szrj                   Create:  va' = vec_shift <va, offset>
569738fd1498Szrj                   Create:  va = vop <va, va'>
569838fd1498Szrj                 }  */
569938fd1498Szrj 
570038fd1498Szrj           tree rhs;
570138fd1498Szrj 
570238fd1498Szrj           if (dump_enabled_p ())
570338fd1498Szrj             dump_printf_loc (MSG_NOTE, vect_location,
570438fd1498Szrj 			     "Reduce using vector shifts\n");
570538fd1498Szrj 
570638fd1498Szrj 	  mode1 = TYPE_MODE (vectype1);
570738fd1498Szrj           vec_dest = vect_create_destination_var (scalar_dest, vectype1);
570838fd1498Szrj           for (elt_offset = nelements / 2;
570938fd1498Szrj                elt_offset >= 1;
571038fd1498Szrj                elt_offset /= 2)
571138fd1498Szrj             {
571238fd1498Szrj 	      calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
571338fd1498Szrj 	      indices.new_vector (sel, 2, nelements);
571438fd1498Szrj 	      tree mask = vect_gen_perm_mask_any (vectype1, indices);
571538fd1498Szrj 	      epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
571638fd1498Szrj 						 new_temp, zero_vec, mask);
571738fd1498Szrj               new_name = make_ssa_name (vec_dest, epilog_stmt);
571838fd1498Szrj               gimple_assign_set_lhs (epilog_stmt, new_name);
571938fd1498Szrj               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
572038fd1498Szrj 
572138fd1498Szrj 	      epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
572238fd1498Szrj 						 new_temp);
572338fd1498Szrj               new_temp = make_ssa_name (vec_dest, epilog_stmt);
572438fd1498Szrj               gimple_assign_set_lhs (epilog_stmt, new_temp);
572538fd1498Szrj               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
572638fd1498Szrj             }
572738fd1498Szrj 
572838fd1498Szrj 	  /* 2.4  Extract the final scalar result.  Create:
572938fd1498Szrj 	     s_out3 = extract_field <v_out2, bitpos>  */
573038fd1498Szrj 
573138fd1498Szrj 	  if (dump_enabled_p ())
573238fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
573338fd1498Szrj 			     "extract scalar result\n");
573438fd1498Szrj 
573538fd1498Szrj 	  rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
573638fd1498Szrj 			bitsize, bitsize_zero_node);
573738fd1498Szrj 	  epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
573838fd1498Szrj 	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
573938fd1498Szrj 	  gimple_assign_set_lhs (epilog_stmt, new_temp);
574038fd1498Szrj 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
574138fd1498Szrj 	  scalar_results.safe_push (new_temp);
574238fd1498Szrj         }
574338fd1498Szrj       else
574438fd1498Szrj         {
574538fd1498Szrj           /* Case 3: Create:
574638fd1498Szrj              s = extract_field <v_out2, 0>
574738fd1498Szrj              for (offset = element_size;
574838fd1498Szrj                   offset < vector_size;
574938fd1498Szrj                   offset += element_size;)
575038fd1498Szrj                {
575138fd1498Szrj                  Create:  s' = extract_field <v_out2, offset>
575238fd1498Szrj                  Create:  s = op <s, s'>  // For non SLP cases
575338fd1498Szrj                }  */
575438fd1498Szrj 
575538fd1498Szrj           if (dump_enabled_p ())
575638fd1498Szrj             dump_printf_loc (MSG_NOTE, vect_location,
575738fd1498Szrj 			     "Reduce using scalar code.\n");
575838fd1498Szrj 
575938fd1498Szrj 	  int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
576038fd1498Szrj 	  int element_bitsize = tree_to_uhwi (bitsize);
576138fd1498Szrj           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
576238fd1498Szrj             {
576338fd1498Szrj               int bit_offset;
576438fd1498Szrj               if (gimple_code (new_phi) == GIMPLE_PHI)
576538fd1498Szrj                 vec_temp = PHI_RESULT (new_phi);
576638fd1498Szrj               else
576738fd1498Szrj                 vec_temp = gimple_assign_lhs (new_phi);
576838fd1498Szrj               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
576938fd1498Szrj 				 bitsize_zero_node);
577038fd1498Szrj               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
577138fd1498Szrj               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
577238fd1498Szrj               gimple_assign_set_lhs (epilog_stmt, new_temp);
577338fd1498Szrj               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
577438fd1498Szrj 
577538fd1498Szrj               /* In SLP we don't need to apply reduction operation, so we just
577638fd1498Szrj                  collect s' values in SCALAR_RESULTS.  */
577738fd1498Szrj               if (slp_reduc)
577838fd1498Szrj                 scalar_results.safe_push (new_temp);
577938fd1498Szrj 
578038fd1498Szrj               for (bit_offset = element_bitsize;
578138fd1498Szrj                    bit_offset < vec_size_in_bits;
578238fd1498Szrj                    bit_offset += element_bitsize)
578338fd1498Szrj                 {
578438fd1498Szrj                   tree bitpos = bitsize_int (bit_offset);
578538fd1498Szrj                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
578638fd1498Szrj                                      bitsize, bitpos);
578738fd1498Szrj 
578838fd1498Szrj                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
578938fd1498Szrj                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
579038fd1498Szrj                   gimple_assign_set_lhs (epilog_stmt, new_name);
579138fd1498Szrj                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
579238fd1498Szrj 
579338fd1498Szrj                   if (slp_reduc)
579438fd1498Szrj                     {
579538fd1498Szrj                       /* In SLP we don't need to apply reduction operation, so
579638fd1498Szrj                          we just collect s' values in SCALAR_RESULTS.  */
579738fd1498Szrj                       new_temp = new_name;
579838fd1498Szrj                       scalar_results.safe_push (new_name);
579938fd1498Szrj                     }
580038fd1498Szrj                   else
580138fd1498Szrj                     {
580238fd1498Szrj 		      epilog_stmt = gimple_build_assign (new_scalar_dest, code,
580338fd1498Szrj 							 new_name, new_temp);
580438fd1498Szrj                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
580538fd1498Szrj                       gimple_assign_set_lhs (epilog_stmt, new_temp);
580638fd1498Szrj                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
580738fd1498Szrj                     }
580838fd1498Szrj                 }
580938fd1498Szrj             }
581038fd1498Szrj 
581138fd1498Szrj           /* The only case where we need to reduce scalar results in SLP, is
581238fd1498Szrj              unrolling.  If the size of SCALAR_RESULTS is greater than
581338fd1498Szrj              GROUP_SIZE, we reduce them combining elements modulo
581438fd1498Szrj              GROUP_SIZE.  */
581538fd1498Szrj           if (slp_reduc)
581638fd1498Szrj             {
581738fd1498Szrj               tree res, first_res, new_res;
581838fd1498Szrj 	      gimple *new_stmt;
581938fd1498Szrj 
582038fd1498Szrj               /* Reduce multiple scalar results in case of SLP unrolling.  */
582138fd1498Szrj               for (j = group_size; scalar_results.iterate (j, &res);
582238fd1498Szrj                    j++)
582338fd1498Szrj                 {
582438fd1498Szrj                   first_res = scalar_results[j % group_size];
582538fd1498Szrj 		  new_stmt = gimple_build_assign (new_scalar_dest, code,
582638fd1498Szrj 						  first_res, res);
582738fd1498Szrj                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
582838fd1498Szrj                   gimple_assign_set_lhs (new_stmt, new_res);
582938fd1498Szrj                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
583038fd1498Szrj                   scalar_results[j % group_size] = new_res;
583138fd1498Szrj                 }
583238fd1498Szrj             }
583338fd1498Szrj           else
583438fd1498Szrj             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
583538fd1498Szrj             scalar_results.safe_push (new_temp);
583638fd1498Szrj         }
583738fd1498Szrj 
583838fd1498Szrj       if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
583938fd1498Szrj 	   == INTEGER_INDUC_COND_REDUCTION)
584038fd1498Szrj 	  && !operand_equal_p (initial_def, induc_val, 0))
584138fd1498Szrj 	{
584238fd1498Szrj 	  /* Earlier we set the initial value to be a vector if induc_val
584338fd1498Szrj 	     values.  Check the result and if it is induc_val then replace
584438fd1498Szrj 	     with the original initial value, unless induc_val is
584538fd1498Szrj 	     the same as initial_def already.  */
584638fd1498Szrj 	  tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
584738fd1498Szrj 				  induc_val);
584838fd1498Szrj 
584938fd1498Szrj 	  tree tmp = make_ssa_name (new_scalar_dest);
585038fd1498Szrj 	  epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
585138fd1498Szrj 					     initial_def, new_temp);
585238fd1498Szrj 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
585338fd1498Szrj 	  scalar_results[0] = tmp;
585438fd1498Szrj 	}
585538fd1498Szrj     }
585638fd1498Szrj 
585738fd1498Szrj vect_finalize_reduction:
585838fd1498Szrj 
585938fd1498Szrj   if (double_reduc)
586038fd1498Szrj     loop = loop->inner;
586138fd1498Szrj 
586238fd1498Szrj   /* 2.5 Adjust the final result by the initial value of the reduction
586338fd1498Szrj 	 variable. (When such adjustment is not needed, then
586438fd1498Szrj 	 'adjustment_def' is zero).  For example, if code is PLUS we create:
586538fd1498Szrj 	 new_temp = loop_exit_def + adjustment_def  */
586638fd1498Szrj 
586738fd1498Szrj   if (adjustment_def)
586838fd1498Szrj     {
586938fd1498Szrj       gcc_assert (!slp_reduc);
587038fd1498Szrj       if (nested_in_vect_loop)
587138fd1498Szrj 	{
587238fd1498Szrj           new_phi = new_phis[0];
587338fd1498Szrj 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
587438fd1498Szrj 	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
587538fd1498Szrj 	  new_dest = vect_create_destination_var (scalar_dest, vectype);
587638fd1498Szrj 	}
587738fd1498Szrj       else
587838fd1498Szrj 	{
587938fd1498Szrj           new_temp = scalar_results[0];
588038fd1498Szrj 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
588138fd1498Szrj 	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
588238fd1498Szrj 	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
588338fd1498Szrj 	}
588438fd1498Szrj 
588538fd1498Szrj       epilog_stmt = gimple_build_assign (new_dest, expr);
588638fd1498Szrj       new_temp = make_ssa_name (new_dest, epilog_stmt);
588738fd1498Szrj       gimple_assign_set_lhs (epilog_stmt, new_temp);
588838fd1498Szrj       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
588938fd1498Szrj       if (nested_in_vect_loop)
589038fd1498Szrj         {
589138fd1498Szrj           set_vinfo_for_stmt (epilog_stmt,
589238fd1498Szrj                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
589338fd1498Szrj           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
589438fd1498Szrj                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
589538fd1498Szrj 
589638fd1498Szrj           if (!double_reduc)
589738fd1498Szrj             scalar_results.quick_push (new_temp);
589838fd1498Szrj           else
589938fd1498Szrj             scalar_results[0] = new_temp;
590038fd1498Szrj         }
590138fd1498Szrj       else
590238fd1498Szrj         scalar_results[0] = new_temp;
590338fd1498Szrj 
590438fd1498Szrj       new_phis[0] = epilog_stmt;
590538fd1498Szrj     }
590638fd1498Szrj 
590738fd1498Szrj   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
590838fd1498Szrj           phis with new adjusted scalar results, i.e., replace use <s_out0>
590938fd1498Szrj           with use <s_out4>.
591038fd1498Szrj 
591138fd1498Szrj      Transform:
591238fd1498Szrj         loop_exit:
591338fd1498Szrj           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
591438fd1498Szrj           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
591538fd1498Szrj           v_out2 = reduce <v_out1>
591638fd1498Szrj           s_out3 = extract_field <v_out2, 0>
591738fd1498Szrj           s_out4 = adjust_result <s_out3>
591838fd1498Szrj           use <s_out0>
591938fd1498Szrj           use <s_out0>
592038fd1498Szrj 
592138fd1498Szrj      into:
592238fd1498Szrj 
592338fd1498Szrj         loop_exit:
592438fd1498Szrj           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
592538fd1498Szrj           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
592638fd1498Szrj           v_out2 = reduce <v_out1>
592738fd1498Szrj           s_out3 = extract_field <v_out2, 0>
592838fd1498Szrj           s_out4 = adjust_result <s_out3>
592938fd1498Szrj           use <s_out4>
593038fd1498Szrj           use <s_out4> */
593138fd1498Szrj 
593238fd1498Szrj 
593338fd1498Szrj   /* In SLP reduction chain we reduce vector results into one vector if
593438fd1498Szrj      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
593538fd1498Szrj      the last stmt in the reduction chain, since we are looking for the loop
593638fd1498Szrj      exit phi node.  */
593738fd1498Szrj   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
593838fd1498Szrj     {
593938fd1498Szrj       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
594038fd1498Szrj       /* Handle reduction patterns.  */
594138fd1498Szrj       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
594238fd1498Szrj 	dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
594338fd1498Szrj 
594438fd1498Szrj       scalar_dest = gimple_assign_lhs (dest_stmt);
594538fd1498Szrj       group_size = 1;
594638fd1498Szrj     }
594738fd1498Szrj 
594838fd1498Szrj   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
594938fd1498Szrj      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
595038fd1498Szrj      need to match SCALAR_RESULTS with corresponding statements.  The first
595138fd1498Szrj      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
595238fd1498Szrj      the first vector stmt, etc.
595338fd1498Szrj      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
595438fd1498Szrj   if (group_size > new_phis.length ())
595538fd1498Szrj     {
595638fd1498Szrj       ratio = group_size / new_phis.length ();
595738fd1498Szrj       gcc_assert (!(group_size % new_phis.length ()));
595838fd1498Szrj     }
595938fd1498Szrj   else
596038fd1498Szrj     ratio = 1;
596138fd1498Szrj 
596238fd1498Szrj   for (k = 0; k < group_size; k++)
596338fd1498Szrj     {
596438fd1498Szrj       if (k % ratio == 0)
596538fd1498Szrj         {
596638fd1498Szrj           epilog_stmt = new_phis[k / ratio];
596738fd1498Szrj           reduction_phi = reduction_phis[k / ratio];
596838fd1498Szrj 	  if (double_reduc)
596938fd1498Szrj 	    inner_phi = inner_phis[k / ratio];
597038fd1498Szrj         }
597138fd1498Szrj 
597238fd1498Szrj       if (slp_reduc)
597338fd1498Szrj         {
597438fd1498Szrj 	  gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
597538fd1498Szrj 
597638fd1498Szrj           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
597738fd1498Szrj           /* SLP statements can't participate in patterns.  */
597838fd1498Szrj           gcc_assert (!orig_stmt);
597938fd1498Szrj           scalar_dest = gimple_assign_lhs (current_stmt);
598038fd1498Szrj         }
598138fd1498Szrj 
598238fd1498Szrj       phis.create (3);
598338fd1498Szrj       /* Find the loop-closed-use at the loop exit of the original scalar
598438fd1498Szrj          result.  (The reduction result is expected to have two immediate uses -
598538fd1498Szrj          one at the latch block, and one at the loop exit).  */
598638fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
598738fd1498Szrj         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
598838fd1498Szrj 	    && !is_gimple_debug (USE_STMT (use_p)))
598938fd1498Szrj           phis.safe_push (USE_STMT (use_p));
599038fd1498Szrj 
599138fd1498Szrj       /* While we expect to have found an exit_phi because of loop-closed-ssa
599238fd1498Szrj          form we can end up without one if the scalar cycle is dead.  */
599338fd1498Szrj 
599438fd1498Szrj       FOR_EACH_VEC_ELT (phis, i, exit_phi)
599538fd1498Szrj         {
599638fd1498Szrj           if (outer_loop)
599738fd1498Szrj             {
599838fd1498Szrj               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
599938fd1498Szrj               gphi *vect_phi;
600038fd1498Szrj 
600138fd1498Szrj               /* FORNOW. Currently not supporting the case that an inner-loop
600238fd1498Szrj                  reduction is not used in the outer-loop (but only outside the
600338fd1498Szrj                  outer-loop), unless it is double reduction.  */
600438fd1498Szrj               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
600538fd1498Szrj                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
600638fd1498Szrj                           || double_reduc);
600738fd1498Szrj 
600838fd1498Szrj 	      if (double_reduc)
600938fd1498Szrj 		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
601038fd1498Szrj 	      else
601138fd1498Szrj 		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
601238fd1498Szrj               if (!double_reduc
601338fd1498Szrj                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
601438fd1498Szrj                       != vect_double_reduction_def)
601538fd1498Szrj                 continue;
601638fd1498Szrj 
601738fd1498Szrj               /* Handle double reduction:
601838fd1498Szrj 
601938fd1498Szrj                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
602038fd1498Szrj                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
602138fd1498Szrj                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
602238fd1498Szrj                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
602338fd1498Szrj 
602438fd1498Szrj                  At that point the regular reduction (stmt2 and stmt3) is
602538fd1498Szrj                  already vectorized, as well as the exit phi node, stmt4.
602638fd1498Szrj                  Here we vectorize the phi node of double reduction, stmt1, and
602738fd1498Szrj                  update all relevant statements.  */
602838fd1498Szrj 
602938fd1498Szrj               /* Go through all the uses of s2 to find double reduction phi
603038fd1498Szrj                  node, i.e., stmt1 above.  */
603138fd1498Szrj               orig_name = PHI_RESULT (exit_phi);
603238fd1498Szrj               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
603338fd1498Szrj                 {
603438fd1498Szrj                   stmt_vec_info use_stmt_vinfo;
603538fd1498Szrj                   stmt_vec_info new_phi_vinfo;
603638fd1498Szrj                   tree vect_phi_init, preheader_arg, vect_phi_res;
603738fd1498Szrj                   basic_block bb = gimple_bb (use_stmt);
603838fd1498Szrj 		  gimple *use;
603938fd1498Szrj 
604038fd1498Szrj                   /* Check that USE_STMT is really double reduction phi
604138fd1498Szrj                      node.  */
604238fd1498Szrj                   if (gimple_code (use_stmt) != GIMPLE_PHI
604338fd1498Szrj                       || gimple_phi_num_args (use_stmt) != 2
604438fd1498Szrj                       || bb->loop_father != outer_loop)
604538fd1498Szrj                     continue;
604638fd1498Szrj                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
604738fd1498Szrj                   if (!use_stmt_vinfo
604838fd1498Szrj                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
604938fd1498Szrj                           != vect_double_reduction_def)
605038fd1498Szrj 		    continue;
605138fd1498Szrj 
605238fd1498Szrj                   /* Create vector phi node for double reduction:
605338fd1498Szrj                      vs1 = phi <vs0, vs2>
605438fd1498Szrj                      vs1 was created previously in this function by a call to
605538fd1498Szrj                        vect_get_vec_def_for_operand and is stored in
605638fd1498Szrj                        vec_initial_def;
605738fd1498Szrj                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
605838fd1498Szrj                      vs0 is created here.  */
605938fd1498Szrj 
606038fd1498Szrj                   /* Create vector phi node.  */
606138fd1498Szrj                   vect_phi = create_phi_node (vec_initial_def, bb);
606238fd1498Szrj                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
606338fd1498Szrj                                     loop_vec_info_for_loop (outer_loop));
606438fd1498Szrj                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
606538fd1498Szrj 
606638fd1498Szrj                   /* Create vs0 - initial def of the double reduction phi.  */
606738fd1498Szrj                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
606838fd1498Szrj                                              loop_preheader_edge (outer_loop));
606938fd1498Szrj                   vect_phi_init = get_initial_def_for_reduction
607038fd1498Szrj 		    (stmt, preheader_arg, NULL);
607138fd1498Szrj 
607238fd1498Szrj                   /* Update phi node arguments with vs0 and vs2.  */
607338fd1498Szrj                   add_phi_arg (vect_phi, vect_phi_init,
607438fd1498Szrj                                loop_preheader_edge (outer_loop),
607538fd1498Szrj                                UNKNOWN_LOCATION);
607638fd1498Szrj                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
607738fd1498Szrj                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
607838fd1498Szrj                   if (dump_enabled_p ())
607938fd1498Szrj                     {
608038fd1498Szrj                       dump_printf_loc (MSG_NOTE, vect_location,
608138fd1498Szrj 				       "created double reduction phi node: ");
608238fd1498Szrj                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
608338fd1498Szrj                     }
608438fd1498Szrj 
608538fd1498Szrj                   vect_phi_res = PHI_RESULT (vect_phi);
608638fd1498Szrj 
608738fd1498Szrj                   /* Replace the use, i.e., set the correct vs1 in the regular
608838fd1498Szrj                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
608938fd1498Szrj                      loop is redundant.  */
609038fd1498Szrj                   use = reduction_phi;
609138fd1498Szrj                   for (j = 0; j < ncopies; j++)
609238fd1498Szrj                     {
609338fd1498Szrj                       edge pr_edge = loop_preheader_edge (loop);
609438fd1498Szrj                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
609538fd1498Szrj                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
609638fd1498Szrj                     }
609738fd1498Szrj                 }
609838fd1498Szrj             }
609938fd1498Szrj         }
610038fd1498Szrj 
610138fd1498Szrj       phis.release ();
610238fd1498Szrj       if (nested_in_vect_loop)
610338fd1498Szrj         {
610438fd1498Szrj           if (double_reduc)
610538fd1498Szrj             loop = outer_loop;
610638fd1498Szrj           else
610738fd1498Szrj             continue;
610838fd1498Szrj         }
610938fd1498Szrj 
611038fd1498Szrj       phis.create (3);
611138fd1498Szrj       /* Find the loop-closed-use at the loop exit of the original scalar
611238fd1498Szrj          result.  (The reduction result is expected to have two immediate uses,
611338fd1498Szrj          one at the latch block, and one at the loop exit).  For double
611438fd1498Szrj          reductions we are looking for exit phis of the outer loop.  */
611538fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
611638fd1498Szrj         {
611738fd1498Szrj           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
611838fd1498Szrj 	    {
611938fd1498Szrj 	      if (!is_gimple_debug (USE_STMT (use_p)))
612038fd1498Szrj 		phis.safe_push (USE_STMT (use_p));
612138fd1498Szrj 	    }
612238fd1498Szrj           else
612338fd1498Szrj             {
612438fd1498Szrj               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
612538fd1498Szrj                 {
612638fd1498Szrj                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
612738fd1498Szrj 
612838fd1498Szrj                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
612938fd1498Szrj                     {
613038fd1498Szrj                       if (!flow_bb_inside_loop_p (loop,
613138fd1498Szrj                                              gimple_bb (USE_STMT (phi_use_p)))
613238fd1498Szrj 			  && !is_gimple_debug (USE_STMT (phi_use_p)))
613338fd1498Szrj                         phis.safe_push (USE_STMT (phi_use_p));
613438fd1498Szrj                     }
613538fd1498Szrj                 }
613638fd1498Szrj             }
613738fd1498Szrj         }
613838fd1498Szrj 
613938fd1498Szrj       FOR_EACH_VEC_ELT (phis, i, exit_phi)
614038fd1498Szrj         {
614138fd1498Szrj           /* Replace the uses:  */
614238fd1498Szrj           orig_name = PHI_RESULT (exit_phi);
614338fd1498Szrj           scalar_result = scalar_results[k];
614438fd1498Szrj           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
614538fd1498Szrj             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
614638fd1498Szrj               SET_USE (use_p, scalar_result);
614738fd1498Szrj         }
614838fd1498Szrj 
614938fd1498Szrj       phis.release ();
615038fd1498Szrj     }
615138fd1498Szrj }
615238fd1498Szrj 
615338fd1498Szrj /* Return a vector of type VECTYPE that is equal to the vector select
615438fd1498Szrj    operation "MASK ? VEC : IDENTITY".  Insert the select statements
615538fd1498Szrj    before GSI.  */
615638fd1498Szrj 
615738fd1498Szrj static tree
merge_with_identity(gimple_stmt_iterator * gsi,tree mask,tree vectype,tree vec,tree identity)615838fd1498Szrj merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
615938fd1498Szrj 		     tree vec, tree identity)
616038fd1498Szrj {
616138fd1498Szrj   tree cond = make_temp_ssa_name (vectype, NULL, "cond");
616238fd1498Szrj   gimple *new_stmt = gimple_build_assign (cond, VEC_COND_EXPR,
616338fd1498Szrj 					  mask, vec, identity);
616438fd1498Szrj   gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
616538fd1498Szrj   return cond;
616638fd1498Szrj }
616738fd1498Szrj 
616838fd1498Szrj /* Successively apply CODE to each element of VECTOR_RHS, in left-to-right
616938fd1498Szrj    order, starting with LHS.  Insert the extraction statements before GSI and
617038fd1498Szrj    associate the new scalar SSA names with variable SCALAR_DEST.
617138fd1498Szrj    Return the SSA name for the result.  */
617238fd1498Szrj 
617338fd1498Szrj static tree
vect_expand_fold_left(gimple_stmt_iterator * gsi,tree scalar_dest,tree_code code,tree lhs,tree vector_rhs)617438fd1498Szrj vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
617538fd1498Szrj 		       tree_code code, tree lhs, tree vector_rhs)
617638fd1498Szrj {
617738fd1498Szrj   tree vectype = TREE_TYPE (vector_rhs);
617838fd1498Szrj   tree scalar_type = TREE_TYPE (vectype);
617938fd1498Szrj   tree bitsize = TYPE_SIZE (scalar_type);
618038fd1498Szrj   unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
618138fd1498Szrj   unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
618238fd1498Szrj 
618338fd1498Szrj   for (unsigned HOST_WIDE_INT bit_offset = 0;
618438fd1498Szrj        bit_offset < vec_size_in_bits;
618538fd1498Szrj        bit_offset += element_bitsize)
618638fd1498Szrj     {
618738fd1498Szrj       tree bitpos = bitsize_int (bit_offset);
618838fd1498Szrj       tree rhs = build3 (BIT_FIELD_REF, scalar_type, vector_rhs,
618938fd1498Szrj 			 bitsize, bitpos);
619038fd1498Szrj 
619138fd1498Szrj       gassign *stmt = gimple_build_assign (scalar_dest, rhs);
619238fd1498Szrj       rhs = make_ssa_name (scalar_dest, stmt);
619338fd1498Szrj       gimple_assign_set_lhs (stmt, rhs);
619438fd1498Szrj       gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
619538fd1498Szrj 
619638fd1498Szrj       stmt = gimple_build_assign (scalar_dest, code, lhs, rhs);
619738fd1498Szrj       tree new_name = make_ssa_name (scalar_dest, stmt);
619838fd1498Szrj       gimple_assign_set_lhs (stmt, new_name);
619938fd1498Szrj       gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
620038fd1498Szrj       lhs = new_name;
620138fd1498Szrj     }
620238fd1498Szrj   return lhs;
620338fd1498Szrj }
620438fd1498Szrj 
620538fd1498Szrj /* Perform an in-order reduction (FOLD_LEFT_REDUCTION).  STMT is the
620638fd1498Szrj    statement that sets the live-out value.  REDUC_DEF_STMT is the phi
620738fd1498Szrj    statement.  CODE is the operation performed by STMT and OPS are
620838fd1498Szrj    its scalar operands.  REDUC_INDEX is the index of the operand in
620938fd1498Szrj    OPS that is set by REDUC_DEF_STMT.  REDUC_FN is the function that
621038fd1498Szrj    implements in-order reduction, or IFN_LAST if we should open-code it.
621138fd1498Szrj    VECTYPE_IN is the type of the vector input.  MASKS specifies the masks
621238fd1498Szrj    that should be used to control the operation in a fully-masked loop.  */
621338fd1498Szrj 
621438fd1498Szrj static bool
vectorize_fold_left_reduction(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,gimple * reduc_def_stmt,tree_code code,internal_fn reduc_fn,tree ops[3],tree vectype_in,int reduc_index,vec_loop_masks * masks)621538fd1498Szrj vectorize_fold_left_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
621638fd1498Szrj 			       gimple **vec_stmt, slp_tree slp_node,
621738fd1498Szrj 			       gimple *reduc_def_stmt,
621838fd1498Szrj 			       tree_code code, internal_fn reduc_fn,
621938fd1498Szrj 			       tree ops[3], tree vectype_in,
622038fd1498Szrj 			       int reduc_index, vec_loop_masks *masks)
622138fd1498Szrj {
622238fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
622338fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
622438fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
622538fd1498Szrj   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
622638fd1498Szrj   gimple *new_stmt = NULL;
622738fd1498Szrj 
622838fd1498Szrj   int ncopies;
622938fd1498Szrj   if (slp_node)
623038fd1498Szrj     ncopies = 1;
623138fd1498Szrj   else
623238fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
623338fd1498Szrj 
623438fd1498Szrj   gcc_assert (!nested_in_vect_loop_p (loop, stmt));
623538fd1498Szrj   gcc_assert (ncopies == 1);
623638fd1498Szrj   gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
623738fd1498Szrj   gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
623838fd1498Szrj   gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
623938fd1498Szrj 	      == FOLD_LEFT_REDUCTION);
624038fd1498Szrj 
624138fd1498Szrj   if (slp_node)
624238fd1498Szrj     gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
624338fd1498Szrj 			  TYPE_VECTOR_SUBPARTS (vectype_in)));
624438fd1498Szrj 
624538fd1498Szrj   tree op0 = ops[1 - reduc_index];
624638fd1498Szrj 
624738fd1498Szrj   int group_size = 1;
624838fd1498Szrj   gimple *scalar_dest_def;
624938fd1498Szrj   auto_vec<tree> vec_oprnds0;
625038fd1498Szrj   if (slp_node)
625138fd1498Szrj     {
625238fd1498Szrj       vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, slp_node);
625338fd1498Szrj       group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
625438fd1498Szrj       scalar_dest_def = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
625538fd1498Szrj     }
625638fd1498Szrj   else
625738fd1498Szrj     {
625838fd1498Szrj       tree loop_vec_def0 = vect_get_vec_def_for_operand (op0, stmt);
625938fd1498Szrj       vec_oprnds0.create (1);
626038fd1498Szrj       vec_oprnds0.quick_push (loop_vec_def0);
626138fd1498Szrj       scalar_dest_def = stmt;
626238fd1498Szrj     }
626338fd1498Szrj 
626438fd1498Szrj   tree scalar_dest = gimple_assign_lhs (scalar_dest_def);
626538fd1498Szrj   tree scalar_type = TREE_TYPE (scalar_dest);
626638fd1498Szrj   tree reduc_var = gimple_phi_result (reduc_def_stmt);
626738fd1498Szrj 
626838fd1498Szrj   int vec_num = vec_oprnds0.length ();
626938fd1498Szrj   gcc_assert (vec_num == 1 || slp_node);
627038fd1498Szrj   tree vec_elem_type = TREE_TYPE (vectype_out);
627138fd1498Szrj   gcc_checking_assert (useless_type_conversion_p (scalar_type, vec_elem_type));
627238fd1498Szrj 
627338fd1498Szrj   tree vector_identity = NULL_TREE;
627438fd1498Szrj   if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
627538fd1498Szrj     vector_identity = build_zero_cst (vectype_out);
627638fd1498Szrj 
627738fd1498Szrj   tree scalar_dest_var = vect_create_destination_var (scalar_dest, NULL);
627838fd1498Szrj   int i;
627938fd1498Szrj   tree def0;
628038fd1498Szrj   FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
628138fd1498Szrj     {
628238fd1498Szrj       tree mask = NULL_TREE;
628338fd1498Szrj       if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
628438fd1498Szrj 	mask = vect_get_loop_mask (gsi, masks, vec_num, vectype_in, i);
628538fd1498Szrj 
628638fd1498Szrj       /* Handle MINUS by adding the negative.  */
628738fd1498Szrj       if (reduc_fn != IFN_LAST && code == MINUS_EXPR)
628838fd1498Szrj 	{
628938fd1498Szrj 	  tree negated = make_ssa_name (vectype_out);
629038fd1498Szrj 	  new_stmt = gimple_build_assign (negated, NEGATE_EXPR, def0);
629138fd1498Szrj 	  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
629238fd1498Szrj 	  def0 = negated;
629338fd1498Szrj 	}
629438fd1498Szrj 
629538fd1498Szrj       if (mask)
629638fd1498Szrj 	def0 = merge_with_identity (gsi, mask, vectype_out, def0,
629738fd1498Szrj 				    vector_identity);
629838fd1498Szrj 
629938fd1498Szrj       /* On the first iteration the input is simply the scalar phi
630038fd1498Szrj 	 result, and for subsequent iterations it is the output of
630138fd1498Szrj 	 the preceding operation.  */
630238fd1498Szrj       if (reduc_fn != IFN_LAST)
630338fd1498Szrj 	{
630438fd1498Szrj 	  new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, def0);
630538fd1498Szrj 	  /* For chained SLP reductions the output of the previous reduction
630638fd1498Szrj 	     operation serves as the input of the next. For the final statement
630738fd1498Szrj 	     the output cannot be a temporary - we reuse the original
630838fd1498Szrj 	     scalar destination of the last statement.  */
630938fd1498Szrj 	  if (i != vec_num - 1)
631038fd1498Szrj 	    {
631138fd1498Szrj 	      gimple_set_lhs (new_stmt, scalar_dest_var);
631238fd1498Szrj 	      reduc_var = make_ssa_name (scalar_dest_var, new_stmt);
631338fd1498Szrj 	      gimple_set_lhs (new_stmt, reduc_var);
631438fd1498Szrj 	    }
631538fd1498Szrj 	}
631638fd1498Szrj       else
631738fd1498Szrj 	{
631838fd1498Szrj 	  reduc_var = vect_expand_fold_left (gsi, scalar_dest_var, code,
631938fd1498Szrj 					     reduc_var, def0);
632038fd1498Szrj 	  new_stmt = SSA_NAME_DEF_STMT (reduc_var);
632138fd1498Szrj 	  /* Remove the statement, so that we can use the same code paths
632238fd1498Szrj 	     as for statements that we've just created.  */
632338fd1498Szrj 	  gimple_stmt_iterator tmp_gsi = gsi_for_stmt (new_stmt);
6324*58e805e6Szrj 	  gsi_remove (&tmp_gsi, true);
632538fd1498Szrj 	}
632638fd1498Szrj 
632738fd1498Szrj       if (i == vec_num - 1)
632838fd1498Szrj 	{
632938fd1498Szrj 	  gimple_set_lhs (new_stmt, scalar_dest);
633038fd1498Szrj 	  vect_finish_replace_stmt (scalar_dest_def, new_stmt);
633138fd1498Szrj 	}
633238fd1498Szrj       else
633338fd1498Szrj 	vect_finish_stmt_generation (scalar_dest_def, new_stmt, gsi);
633438fd1498Szrj 
633538fd1498Szrj       if (slp_node)
633638fd1498Szrj 	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
633738fd1498Szrj     }
633838fd1498Szrj 
633938fd1498Szrj   if (!slp_node)
634038fd1498Szrj     STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
634138fd1498Szrj 
634238fd1498Szrj   return true;
634338fd1498Szrj }
634438fd1498Szrj 
634538fd1498Szrj /* Function is_nonwrapping_integer_induction.
634638fd1498Szrj 
634738fd1498Szrj    Check if STMT (which is part of loop LOOP) both increments and
634838fd1498Szrj    does not cause overflow.  */
634938fd1498Szrj 
635038fd1498Szrj static bool
is_nonwrapping_integer_induction(gimple * stmt,struct loop * loop)635138fd1498Szrj is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
635238fd1498Szrj {
635338fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
635438fd1498Szrj   tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
635538fd1498Szrj   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
635638fd1498Szrj   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
635738fd1498Szrj   widest_int ni, max_loop_value, lhs_max;
635838fd1498Szrj   bool overflow = false;
635938fd1498Szrj 
636038fd1498Szrj   /* Make sure the loop is integer based.  */
636138fd1498Szrj   if (TREE_CODE (base) != INTEGER_CST
636238fd1498Szrj       || TREE_CODE (step) != INTEGER_CST)
636338fd1498Szrj     return false;
636438fd1498Szrj 
636538fd1498Szrj   /* Check that the max size of the loop will not wrap.  */
636638fd1498Szrj 
636738fd1498Szrj   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
636838fd1498Szrj     return true;
636938fd1498Szrj 
637038fd1498Szrj   if (! max_stmt_executions (loop, &ni))
637138fd1498Szrj     return false;
637238fd1498Szrj 
637338fd1498Szrj   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
637438fd1498Szrj 			    &overflow);
637538fd1498Szrj   if (overflow)
637638fd1498Szrj     return false;
637738fd1498Szrj 
637838fd1498Szrj   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
637938fd1498Szrj 			    TYPE_SIGN (lhs_type), &overflow);
638038fd1498Szrj   if (overflow)
638138fd1498Szrj     return false;
638238fd1498Szrj 
638338fd1498Szrj   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
638438fd1498Szrj 	  <= TYPE_PRECISION (lhs_type));
638538fd1498Szrj }
638638fd1498Szrj 
638738fd1498Szrj /* Function vectorizable_reduction.
638838fd1498Szrj 
638938fd1498Szrj    Check if STMT performs a reduction operation that can be vectorized.
639038fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
639138fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
639238fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.
639338fd1498Szrj 
639438fd1498Szrj    This function also handles reduction idioms (patterns) that have been
639538fd1498Szrj    recognized in advance during vect_pattern_recog.  In this case, STMT may be
639638fd1498Szrj    of this form:
639738fd1498Szrj      X = pattern_expr (arg0, arg1, ..., X)
639838fd1498Szrj    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
639938fd1498Szrj    sequence that had been detected and replaced by the pattern-stmt (STMT).
640038fd1498Szrj 
640138fd1498Szrj    This function also handles reduction of condition expressions, for example:
640238fd1498Szrj      for (int i = 0; i < N; i++)
640338fd1498Szrj        if (a[i] < value)
640438fd1498Szrj 	 last = a[i];
640538fd1498Szrj    This is handled by vectorising the loop and creating an additional vector
640638fd1498Szrj    containing the loop indexes for which "a[i] < value" was true.  In the
640738fd1498Szrj    function epilogue this is reduced to a single max value and then used to
640838fd1498Szrj    index into the vector of results.
640938fd1498Szrj 
641038fd1498Szrj    In some cases of reduction patterns, the type of the reduction variable X is
641138fd1498Szrj    different than the type of the other arguments of STMT.
641238fd1498Szrj    In such cases, the vectype that is used when transforming STMT into a vector
641338fd1498Szrj    stmt is different than the vectype that is used to determine the
641438fd1498Szrj    vectorization factor, because it consists of a different number of elements
641538fd1498Szrj    than the actual number of elements that are being operated upon in parallel.
641638fd1498Szrj 
641738fd1498Szrj    For example, consider an accumulation of shorts into an int accumulator.
641838fd1498Szrj    On some targets it's possible to vectorize this pattern operating on 8
641938fd1498Szrj    shorts at a time (hence, the vectype for purposes of determining the
642038fd1498Szrj    vectorization factor should be V8HI); on the other hand, the vectype that
642138fd1498Szrj    is used to create the vector form is actually V4SI (the type of the result).
642238fd1498Szrj 
642338fd1498Szrj    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
642438fd1498Szrj    indicates what is the actual level of parallelism (V8HI in the example), so
642538fd1498Szrj    that the right vectorization factor would be derived.  This vectype
642638fd1498Szrj    corresponds to the type of arguments to the reduction stmt, and should *NOT*
642738fd1498Szrj    be used to create the vectorized stmt.  The right vectype for the vectorized
642838fd1498Szrj    stmt is obtained from the type of the result X:
642938fd1498Szrj         get_vectype_for_scalar_type (TREE_TYPE (X))
643038fd1498Szrj 
643138fd1498Szrj    This means that, contrary to "regular" reductions (or "regular" stmts in
643238fd1498Szrj    general), the following equation:
643338fd1498Szrj       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
643438fd1498Szrj    does *NOT* necessarily hold for reduction patterns.  */
643538fd1498Szrj 
643638fd1498Szrj bool
vectorizable_reduction(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_instance slp_node_instance)643738fd1498Szrj vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
643838fd1498Szrj 			gimple **vec_stmt, slp_tree slp_node,
643938fd1498Szrj 			slp_instance slp_node_instance)
644038fd1498Szrj {
644138fd1498Szrj   tree vec_dest;
644238fd1498Szrj   tree scalar_dest;
644338fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
644438fd1498Szrj   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
644538fd1498Szrj   tree vectype_in = NULL_TREE;
644638fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
644738fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644838fd1498Szrj   enum tree_code code, orig_code;
644938fd1498Szrj   internal_fn reduc_fn;
645038fd1498Szrj   machine_mode vec_mode;
645138fd1498Szrj   int op_type;
645238fd1498Szrj   optab optab;
645338fd1498Szrj   tree new_temp = NULL_TREE;
645438fd1498Szrj   gimple *def_stmt;
645538fd1498Szrj   enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
645638fd1498Szrj   gimple *cond_reduc_def_stmt = NULL;
645738fd1498Szrj   enum tree_code cond_reduc_op_code = ERROR_MARK;
645838fd1498Szrj   tree scalar_type;
645938fd1498Szrj   bool is_simple_use;
646038fd1498Szrj   gimple *orig_stmt;
646138fd1498Szrj   stmt_vec_info orig_stmt_info = NULL;
646238fd1498Szrj   int i;
646338fd1498Szrj   int ncopies;
646438fd1498Szrj   int epilog_copies;
646538fd1498Szrj   stmt_vec_info prev_stmt_info, prev_phi_info;
646638fd1498Szrj   bool single_defuse_cycle = false;
646738fd1498Szrj   gimple *new_stmt = NULL;
646838fd1498Szrj   int j;
646938fd1498Szrj   tree ops[3];
647038fd1498Szrj   enum vect_def_type dts[3];
647138fd1498Szrj   bool nested_cycle = false, found_nested_cycle_def = false;
647238fd1498Szrj   bool double_reduc = false;
647338fd1498Szrj   basic_block def_bb;
647438fd1498Szrj   struct loop * def_stmt_loop, *outer_loop = NULL;
647538fd1498Szrj   tree def_arg;
647638fd1498Szrj   gimple *def_arg_stmt;
647738fd1498Szrj   auto_vec<tree> vec_oprnds0;
647838fd1498Szrj   auto_vec<tree> vec_oprnds1;
647938fd1498Szrj   auto_vec<tree> vec_oprnds2;
648038fd1498Szrj   auto_vec<tree> vect_defs;
648138fd1498Szrj   auto_vec<gimple *> phis;
648238fd1498Szrj   int vec_num;
648338fd1498Szrj   tree def0, tem;
648438fd1498Szrj   bool first_p = true;
648538fd1498Szrj   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
648638fd1498Szrj   tree cond_reduc_val = NULL_TREE;
648738fd1498Szrj 
648838fd1498Szrj   /* Make sure it was already recognized as a reduction computation.  */
648938fd1498Szrj   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
649038fd1498Szrj       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
649138fd1498Szrj     return false;
649238fd1498Szrj 
649338fd1498Szrj   if (nested_in_vect_loop_p (loop, stmt))
649438fd1498Szrj     {
649538fd1498Szrj       outer_loop = loop;
649638fd1498Szrj       loop = loop->inner;
649738fd1498Szrj       nested_cycle = true;
649838fd1498Szrj     }
649938fd1498Szrj 
650038fd1498Szrj   /* In case of reduction chain we switch to the first stmt in the chain, but
650138fd1498Szrj      we don't update STMT_INFO, since only the last stmt is marked as reduction
650238fd1498Szrj      and has reduction properties.  */
650338fd1498Szrj   if (GROUP_FIRST_ELEMENT (stmt_info)
650438fd1498Szrj       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
650538fd1498Szrj     {
650638fd1498Szrj       stmt = GROUP_FIRST_ELEMENT (stmt_info);
650738fd1498Szrj       first_p = false;
650838fd1498Szrj     }
650938fd1498Szrj 
651038fd1498Szrj   if (gimple_code (stmt) == GIMPLE_PHI)
651138fd1498Szrj     {
651238fd1498Szrj       /* Analysis is fully done on the reduction stmt invocation.  */
651338fd1498Szrj       if (! vec_stmt)
651438fd1498Szrj 	{
651538fd1498Szrj 	  if (slp_node)
651638fd1498Szrj 	    slp_node_instance->reduc_phis = slp_node;
651738fd1498Szrj 
651838fd1498Szrj 	  STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
651938fd1498Szrj 	  return true;
652038fd1498Szrj 	}
652138fd1498Szrj 
652238fd1498Szrj       if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
652338fd1498Szrj 	/* Leave the scalar phi in place.  Note that checking
652438fd1498Szrj 	   STMT_VINFO_VEC_REDUCTION_TYPE (as below) only works
652538fd1498Szrj 	   for reductions involving a single statement.  */
652638fd1498Szrj 	return true;
652738fd1498Szrj 
652838fd1498Szrj       gimple *reduc_stmt = STMT_VINFO_REDUC_DEF (stmt_info);
652938fd1498Szrj       if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt)))
653038fd1498Szrj 	reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt));
653138fd1498Szrj 
653238fd1498Szrj       if (STMT_VINFO_VEC_REDUCTION_TYPE (vinfo_for_stmt (reduc_stmt))
653338fd1498Szrj 	  == EXTRACT_LAST_REDUCTION)
653438fd1498Szrj 	/* Leave the scalar phi in place.  */
653538fd1498Szrj 	return true;
653638fd1498Szrj 
653738fd1498Szrj       gcc_assert (is_gimple_assign (reduc_stmt));
653838fd1498Szrj       for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
653938fd1498Szrj 	{
654038fd1498Szrj 	  tree op = gimple_op (reduc_stmt, k);
654138fd1498Szrj 	  if (op == gimple_phi_result (stmt))
654238fd1498Szrj 	    continue;
654338fd1498Szrj 	  if (k == 1
654438fd1498Szrj 	      && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
654538fd1498Szrj 	    continue;
654638fd1498Szrj 	  if (!vectype_in
654738fd1498Szrj 	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
654838fd1498Szrj 		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op)))))
654938fd1498Szrj 	    vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
655038fd1498Szrj 	  break;
655138fd1498Szrj 	}
655238fd1498Szrj       gcc_assert (vectype_in);
655338fd1498Szrj 
655438fd1498Szrj       if (slp_node)
655538fd1498Szrj 	ncopies = 1;
655638fd1498Szrj       else
655738fd1498Szrj 	ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
655838fd1498Szrj 
655938fd1498Szrj       use_operand_p use_p;
656038fd1498Szrj       gimple *use_stmt;
656138fd1498Szrj       if (ncopies > 1
656238fd1498Szrj 	  && (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt))
656338fd1498Szrj 	      <= vect_used_only_live)
656438fd1498Szrj 	  && single_imm_use (gimple_phi_result (stmt), &use_p, &use_stmt)
656538fd1498Szrj 	  && (use_stmt == reduc_stmt
656638fd1498Szrj 	      || (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt))
656738fd1498Szrj 		  == reduc_stmt)))
656838fd1498Szrj 	single_defuse_cycle = true;
656938fd1498Szrj 
657038fd1498Szrj       /* Create the destination vector  */
657138fd1498Szrj       scalar_dest = gimple_assign_lhs (reduc_stmt);
657238fd1498Szrj       vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
657338fd1498Szrj 
657438fd1498Szrj       if (slp_node)
657538fd1498Szrj 	/* The size vect_schedule_slp_instance computes is off for us.  */
657638fd1498Szrj 	vec_num = vect_get_num_vectors
657738fd1498Szrj 	  (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
657838fd1498Szrj 	   * SLP_TREE_SCALAR_STMTS (slp_node).length (),
657938fd1498Szrj 	   vectype_in);
658038fd1498Szrj       else
658138fd1498Szrj 	vec_num = 1;
658238fd1498Szrj 
658338fd1498Szrj       /* Generate the reduction PHIs upfront.  */
658438fd1498Szrj       prev_phi_info = NULL;
658538fd1498Szrj       for (j = 0; j < ncopies; j++)
658638fd1498Szrj 	{
658738fd1498Szrj 	  if (j == 0 || !single_defuse_cycle)
658838fd1498Szrj 	    {
658938fd1498Szrj 	      for (i = 0; i < vec_num; i++)
659038fd1498Szrj 		{
659138fd1498Szrj 		  /* Create the reduction-phi that defines the reduction
659238fd1498Szrj 		     operand.  */
659338fd1498Szrj 		  gimple *new_phi = create_phi_node (vec_dest, loop->header);
659438fd1498Szrj 		  set_vinfo_for_stmt (new_phi,
659538fd1498Szrj 				      new_stmt_vec_info (new_phi, loop_vinfo));
659638fd1498Szrj 
659738fd1498Szrj 		  if (slp_node)
659838fd1498Szrj 		    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi);
659938fd1498Szrj 		  else
660038fd1498Szrj 		    {
660138fd1498Szrj 		      if (j == 0)
660238fd1498Szrj 			STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi;
660338fd1498Szrj 		      else
660438fd1498Szrj 			STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
660538fd1498Szrj 		      prev_phi_info = vinfo_for_stmt (new_phi);
660638fd1498Szrj 		    }
660738fd1498Szrj 		}
660838fd1498Szrj 	    }
660938fd1498Szrj 	}
661038fd1498Szrj 
661138fd1498Szrj       return true;
661238fd1498Szrj     }
661338fd1498Szrj 
661438fd1498Szrj   /* 1. Is vectorizable reduction?  */
661538fd1498Szrj   /* Not supportable if the reduction variable is used in the loop, unless
661638fd1498Szrj      it's a reduction chain.  */
661738fd1498Szrj   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
661838fd1498Szrj       && !GROUP_FIRST_ELEMENT (stmt_info))
661938fd1498Szrj     return false;
662038fd1498Szrj 
662138fd1498Szrj   /* Reductions that are not used even in an enclosing outer-loop,
662238fd1498Szrj      are expected to be "live" (used out of the loop).  */
662338fd1498Szrj   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
662438fd1498Szrj       && !STMT_VINFO_LIVE_P (stmt_info))
662538fd1498Szrj     return false;
662638fd1498Szrj 
662738fd1498Szrj   /* 2. Has this been recognized as a reduction pattern?
662838fd1498Szrj 
662938fd1498Szrj      Check if STMT represents a pattern that has been recognized
663038fd1498Szrj      in earlier analysis stages.  For stmts that represent a pattern,
663138fd1498Szrj      the STMT_VINFO_RELATED_STMT field records the last stmt in
663238fd1498Szrj      the original sequence that constitutes the pattern.  */
663338fd1498Szrj 
663438fd1498Szrj   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
663538fd1498Szrj   if (orig_stmt)
663638fd1498Szrj     {
663738fd1498Szrj       orig_stmt_info = vinfo_for_stmt (orig_stmt);
663838fd1498Szrj       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
663938fd1498Szrj       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
664038fd1498Szrj     }
664138fd1498Szrj 
664238fd1498Szrj   /* 3. Check the operands of the operation.  The first operands are defined
664338fd1498Szrj         inside the loop body. The last operand is the reduction variable,
664438fd1498Szrj         which is defined by the loop-header-phi.  */
664538fd1498Szrj 
664638fd1498Szrj   gcc_assert (is_gimple_assign (stmt));
664738fd1498Szrj 
664838fd1498Szrj   /* Flatten RHS.  */
664938fd1498Szrj   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
665038fd1498Szrj     {
665138fd1498Szrj     case GIMPLE_BINARY_RHS:
665238fd1498Szrj       code = gimple_assign_rhs_code (stmt);
665338fd1498Szrj       op_type = TREE_CODE_LENGTH (code);
665438fd1498Szrj       gcc_assert (op_type == binary_op);
665538fd1498Szrj       ops[0] = gimple_assign_rhs1 (stmt);
665638fd1498Szrj       ops[1] = gimple_assign_rhs2 (stmt);
665738fd1498Szrj       break;
665838fd1498Szrj 
665938fd1498Szrj     case GIMPLE_TERNARY_RHS:
666038fd1498Szrj       code = gimple_assign_rhs_code (stmt);
666138fd1498Szrj       op_type = TREE_CODE_LENGTH (code);
666238fd1498Szrj       gcc_assert (op_type == ternary_op);
666338fd1498Szrj       ops[0] = gimple_assign_rhs1 (stmt);
666438fd1498Szrj       ops[1] = gimple_assign_rhs2 (stmt);
666538fd1498Szrj       ops[2] = gimple_assign_rhs3 (stmt);
666638fd1498Szrj       break;
666738fd1498Szrj 
666838fd1498Szrj     case GIMPLE_UNARY_RHS:
666938fd1498Szrj       return false;
667038fd1498Szrj 
667138fd1498Szrj     default:
667238fd1498Szrj       gcc_unreachable ();
667338fd1498Szrj     }
667438fd1498Szrj 
667538fd1498Szrj   if (code == COND_EXPR && slp_node)
667638fd1498Szrj     return false;
667738fd1498Szrj 
667838fd1498Szrj   scalar_dest = gimple_assign_lhs (stmt);
667938fd1498Szrj   scalar_type = TREE_TYPE (scalar_dest);
668038fd1498Szrj   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
668138fd1498Szrj       && !SCALAR_FLOAT_TYPE_P (scalar_type))
668238fd1498Szrj     return false;
668338fd1498Szrj 
668438fd1498Szrj   /* Do not try to vectorize bit-precision reductions.  */
668538fd1498Szrj   if (!type_has_mode_precision_p (scalar_type))
668638fd1498Szrj     return false;
668738fd1498Szrj 
668838fd1498Szrj   /* All uses but the last are expected to be defined in the loop.
668938fd1498Szrj      The last use is the reduction variable.  In case of nested cycle this
669038fd1498Szrj      assumption is not true: we use reduc_index to record the index of the
669138fd1498Szrj      reduction variable.  */
669238fd1498Szrj   gimple *reduc_def_stmt = NULL;
669338fd1498Szrj   int reduc_index = -1;
669438fd1498Szrj   for (i = 0; i < op_type; i++)
669538fd1498Szrj     {
669638fd1498Szrj       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
669738fd1498Szrj       if (i == 0 && code == COND_EXPR)
669838fd1498Szrj         continue;
669938fd1498Szrj 
670038fd1498Szrj       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
670138fd1498Szrj 					  &def_stmt, &dts[i], &tem);
670238fd1498Szrj       dt = dts[i];
670338fd1498Szrj       gcc_assert (is_simple_use);
670438fd1498Szrj       if (dt == vect_reduction_def)
670538fd1498Szrj 	{
670638fd1498Szrj           reduc_def_stmt = def_stmt;
670738fd1498Szrj 	  reduc_index = i;
670838fd1498Szrj 	  continue;
670938fd1498Szrj 	}
671038fd1498Szrj       else if (tem)
671138fd1498Szrj 	{
671238fd1498Szrj 	  /* To properly compute ncopies we are interested in the widest
671338fd1498Szrj 	     input type in case we're looking at a widening accumulation.  */
671438fd1498Szrj 	  if (!vectype_in
671538fd1498Szrj 	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
671638fd1498Szrj 		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
671738fd1498Szrj 	    vectype_in = tem;
671838fd1498Szrj 	}
671938fd1498Szrj 
672038fd1498Szrj       if (dt != vect_internal_def
672138fd1498Szrj 	  && dt != vect_external_def
672238fd1498Szrj 	  && dt != vect_constant_def
672338fd1498Szrj 	  && dt != vect_induction_def
672438fd1498Szrj           && !(dt == vect_nested_cycle && nested_cycle))
672538fd1498Szrj 	return false;
672638fd1498Szrj 
672738fd1498Szrj       if (dt == vect_nested_cycle)
672838fd1498Szrj         {
672938fd1498Szrj           found_nested_cycle_def = true;
673038fd1498Szrj           reduc_def_stmt = def_stmt;
673138fd1498Szrj           reduc_index = i;
673238fd1498Szrj         }
673338fd1498Szrj 
673438fd1498Szrj       if (i == 1 && code == COND_EXPR)
673538fd1498Szrj 	{
673638fd1498Szrj 	  /* Record how value of COND_EXPR is defined.  */
673738fd1498Szrj 	  if (dt == vect_constant_def)
673838fd1498Szrj 	    {
673938fd1498Szrj 	      cond_reduc_dt = dt;
674038fd1498Szrj 	      cond_reduc_val = ops[i];
674138fd1498Szrj 	    }
674238fd1498Szrj 	  if (dt == vect_induction_def
674338fd1498Szrj 	      && def_stmt != NULL
674438fd1498Szrj 	      && is_nonwrapping_integer_induction (def_stmt, loop))
674538fd1498Szrj 	    {
674638fd1498Szrj 	      cond_reduc_dt = dt;
674738fd1498Szrj 	      cond_reduc_def_stmt = def_stmt;
674838fd1498Szrj 	    }
674938fd1498Szrj 	}
675038fd1498Szrj     }
675138fd1498Szrj 
675238fd1498Szrj   if (!vectype_in)
675338fd1498Szrj     vectype_in = vectype_out;
675438fd1498Szrj 
675538fd1498Szrj   /* When vectorizing a reduction chain w/o SLP the reduction PHI is not
675638fd1498Szrj      directy used in stmt.  */
675738fd1498Szrj   if (reduc_index == -1)
675838fd1498Szrj     {
675938fd1498Szrj       if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
676038fd1498Szrj 	{
676138fd1498Szrj 	  if (dump_enabled_p ())
676238fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
676338fd1498Szrj 			     "in-order reduction chain without SLP.\n");
676438fd1498Szrj 	  return false;
676538fd1498Szrj 	}
676638fd1498Szrj 
676738fd1498Szrj       if (orig_stmt)
676838fd1498Szrj 	reduc_def_stmt = STMT_VINFO_REDUC_DEF (orig_stmt_info);
676938fd1498Szrj       else
677038fd1498Szrj 	reduc_def_stmt = STMT_VINFO_REDUC_DEF (stmt_info);
677138fd1498Szrj     }
677238fd1498Szrj 
677338fd1498Szrj   if (! reduc_def_stmt || gimple_code (reduc_def_stmt) != GIMPLE_PHI)
677438fd1498Szrj     return false;
677538fd1498Szrj 
677638fd1498Szrj   if (!(reduc_index == -1
677738fd1498Szrj 	|| dts[reduc_index] == vect_reduction_def
677838fd1498Szrj 	|| dts[reduc_index] == vect_nested_cycle
677938fd1498Szrj 	|| ((dts[reduc_index] == vect_internal_def
678038fd1498Szrj 	     || dts[reduc_index] == vect_external_def
678138fd1498Szrj 	     || dts[reduc_index] == vect_constant_def
678238fd1498Szrj 	     || dts[reduc_index] == vect_induction_def)
678338fd1498Szrj 	    && nested_cycle && found_nested_cycle_def)))
678438fd1498Szrj     {
678538fd1498Szrj       /* For pattern recognized stmts, orig_stmt might be a reduction,
678638fd1498Szrj 	 but some helper statements for the pattern might not, or
678738fd1498Szrj 	 might be COND_EXPRs with reduction uses in the condition.  */
678838fd1498Szrj       gcc_assert (orig_stmt);
678938fd1498Szrj       return false;
679038fd1498Szrj     }
679138fd1498Szrj 
679238fd1498Szrj   stmt_vec_info reduc_def_info = vinfo_for_stmt (reduc_def_stmt);
679338fd1498Szrj   enum vect_reduction_type v_reduc_type
679438fd1498Szrj     = STMT_VINFO_REDUC_TYPE (reduc_def_info);
679538fd1498Szrj   gimple *tmp = STMT_VINFO_REDUC_DEF (reduc_def_info);
679638fd1498Szrj 
679738fd1498Szrj   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
679838fd1498Szrj   /* If we have a condition reduction, see if we can simplify it further.  */
679938fd1498Szrj   if (v_reduc_type == COND_REDUCTION)
680038fd1498Szrj     {
680138fd1498Szrj       /* TODO: We can't yet handle reduction chains, since we need to treat
680238fd1498Szrj 	 each COND_EXPR in the chain specially, not just the last one.
680338fd1498Szrj 	 E.g. for:
680438fd1498Szrj 
680538fd1498Szrj 	    x_1 = PHI <x_3, ...>
680638fd1498Szrj 	    x_2 = a_2 ? ... : x_1;
680738fd1498Szrj 	    x_3 = a_3 ? ... : x_2;
680838fd1498Szrj 
680938fd1498Szrj 	 we're interested in the last element in x_3 for which a_2 || a_3
681038fd1498Szrj 	 is true, whereas the current reduction chain handling would
681138fd1498Szrj 	 vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
681238fd1498Szrj 	 as a reduction operation.  */
681338fd1498Szrj       if (reduc_index == -1)
681438fd1498Szrj 	{
681538fd1498Szrj 	  if (dump_enabled_p ())
681638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681738fd1498Szrj 			     "conditional reduction chains not supported\n");
681838fd1498Szrj 	  return false;
681938fd1498Szrj 	}
682038fd1498Szrj 
682138fd1498Szrj       /* vect_is_simple_reduction ensured that operand 2 is the
682238fd1498Szrj 	 loop-carried operand.  */
682338fd1498Szrj       gcc_assert (reduc_index == 2);
682438fd1498Szrj 
682538fd1498Szrj       /* Loop peeling modifies initial value of reduction PHI, which
682638fd1498Szrj 	 makes the reduction stmt to be transformed different to the
682738fd1498Szrj 	 original stmt analyzed.  We need to record reduction code for
682838fd1498Szrj 	 CONST_COND_REDUCTION type reduction at analyzing stage, thus
682938fd1498Szrj 	 it can be used directly at transform stage.  */
683038fd1498Szrj       if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
683138fd1498Szrj 	  || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
683238fd1498Szrj 	{
683338fd1498Szrj 	  /* Also set the reduction type to CONST_COND_REDUCTION.  */
683438fd1498Szrj 	  gcc_assert (cond_reduc_dt == vect_constant_def);
683538fd1498Szrj 	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
683638fd1498Szrj 	}
683738fd1498Szrj       else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
683838fd1498Szrj 					       vectype_in, OPTIMIZE_FOR_SPEED))
683938fd1498Szrj 	{
684038fd1498Szrj 	  if (dump_enabled_p ())
684138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
684238fd1498Szrj 			     "optimizing condition reduction with"
684338fd1498Szrj 			     " FOLD_EXTRACT_LAST.\n");
684438fd1498Szrj 	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION;
684538fd1498Szrj 	}
684638fd1498Szrj       else if (cond_reduc_dt == vect_induction_def)
684738fd1498Szrj 	{
684838fd1498Szrj 	  stmt_vec_info cond_stmt_vinfo = vinfo_for_stmt (cond_reduc_def_stmt);
684938fd1498Szrj 	  tree base
685038fd1498Szrj 	    = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo);
685138fd1498Szrj 	  tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (cond_stmt_vinfo);
685238fd1498Szrj 
685338fd1498Szrj 	  gcc_assert (TREE_CODE (base) == INTEGER_CST
685438fd1498Szrj 		      && TREE_CODE (step) == INTEGER_CST);
685538fd1498Szrj 	  cond_reduc_val = NULL_TREE;
685638fd1498Szrj 	  /* Find a suitable value, for MAX_EXPR below base, for MIN_EXPR
685738fd1498Szrj 	     above base; punt if base is the minimum value of the type for
685838fd1498Szrj 	     MAX_EXPR or maximum value of the type for MIN_EXPR for now.  */
685938fd1498Szrj 	  if (tree_int_cst_sgn (step) == -1)
686038fd1498Szrj 	    {
686138fd1498Szrj 	      cond_reduc_op_code = MIN_EXPR;
686238fd1498Szrj 	      if (tree_int_cst_sgn (base) == -1)
686338fd1498Szrj 		cond_reduc_val = build_int_cst (TREE_TYPE (base), 0);
686438fd1498Szrj 	      else if (tree_int_cst_lt (base,
686538fd1498Szrj 					TYPE_MAX_VALUE (TREE_TYPE (base))))
686638fd1498Szrj 		cond_reduc_val
686738fd1498Szrj 		  = int_const_binop (PLUS_EXPR, base, integer_one_node);
686838fd1498Szrj 	    }
686938fd1498Szrj 	  else
687038fd1498Szrj 	    {
687138fd1498Szrj 	      cond_reduc_op_code = MAX_EXPR;
687238fd1498Szrj 	      if (tree_int_cst_sgn (base) == 1)
687338fd1498Szrj 		cond_reduc_val = build_int_cst (TREE_TYPE (base), 0);
687438fd1498Szrj 	      else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)),
687538fd1498Szrj 					base))
687638fd1498Szrj 		cond_reduc_val
687738fd1498Szrj 		  = int_const_binop (MINUS_EXPR, base, integer_one_node);
687838fd1498Szrj 	    }
687938fd1498Szrj 	  if (cond_reduc_val)
688038fd1498Szrj 	    {
688138fd1498Szrj 	      if (dump_enabled_p ())
688238fd1498Szrj 		dump_printf_loc (MSG_NOTE, vect_location,
688338fd1498Szrj 				 "condition expression based on "
688438fd1498Szrj 				 "integer induction.\n");
688538fd1498Szrj 	      STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
688638fd1498Szrj 		= INTEGER_INDUC_COND_REDUCTION;
688738fd1498Szrj 	    }
688838fd1498Szrj 	}
688938fd1498Szrj       else if (cond_reduc_dt == vect_constant_def)
689038fd1498Szrj 	{
689138fd1498Szrj 	  enum vect_def_type cond_initial_dt;
689238fd1498Szrj 	  gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
689338fd1498Szrj 	  tree cond_initial_val
689438fd1498Szrj 	    = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
689538fd1498Szrj 
689638fd1498Szrj 	  gcc_assert (cond_reduc_val != NULL_TREE);
689738fd1498Szrj 	  vect_is_simple_use (cond_initial_val, loop_vinfo,
689838fd1498Szrj 			      &def_stmt, &cond_initial_dt);
689938fd1498Szrj 	  if (cond_initial_dt == vect_constant_def
690038fd1498Szrj 	      && types_compatible_p (TREE_TYPE (cond_initial_val),
690138fd1498Szrj 				     TREE_TYPE (cond_reduc_val)))
690238fd1498Szrj 	    {
690338fd1498Szrj 	      tree e = fold_binary (LE_EXPR, boolean_type_node,
690438fd1498Szrj 				    cond_initial_val, cond_reduc_val);
690538fd1498Szrj 	      if (e && (integer_onep (e) || integer_zerop (e)))
690638fd1498Szrj 		{
690738fd1498Szrj 		  if (dump_enabled_p ())
690838fd1498Szrj 		    dump_printf_loc (MSG_NOTE, vect_location,
690938fd1498Szrj 				     "condition expression based on "
691038fd1498Szrj 				     "compile time constant.\n");
691138fd1498Szrj 		  /* Record reduction code at analysis stage.  */
691238fd1498Szrj 		  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
691338fd1498Szrj 		    = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
691438fd1498Szrj 		  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
691538fd1498Szrj 		    = CONST_COND_REDUCTION;
691638fd1498Szrj 		}
691738fd1498Szrj 	    }
691838fd1498Szrj 	}
691938fd1498Szrj     }
692038fd1498Szrj 
692138fd1498Szrj   if (orig_stmt)
692238fd1498Szrj     gcc_assert (tmp == orig_stmt
692338fd1498Szrj 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
692438fd1498Szrj   else
692538fd1498Szrj     /* We changed STMT to be the first stmt in reduction chain, hence we
692638fd1498Szrj        check that in this case the first element in the chain is STMT.  */
692738fd1498Szrj     gcc_assert (stmt == tmp
692838fd1498Szrj 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
692938fd1498Szrj 
693038fd1498Szrj   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
693138fd1498Szrj     return false;
693238fd1498Szrj 
693338fd1498Szrj   if (slp_node)
693438fd1498Szrj     ncopies = 1;
693538fd1498Szrj   else
693638fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
693738fd1498Szrj 
693838fd1498Szrj   gcc_assert (ncopies >= 1);
693938fd1498Szrj 
694038fd1498Szrj   vec_mode = TYPE_MODE (vectype_in);
694138fd1498Szrj   poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
694238fd1498Szrj 
694338fd1498Szrj   if (code == COND_EXPR)
694438fd1498Szrj     {
694538fd1498Szrj       /* Only call during the analysis stage, otherwise we'll lose
694638fd1498Szrj 	 STMT_VINFO_TYPE.  */
694738fd1498Szrj       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
694838fd1498Szrj 						ops[reduc_index], 0, NULL))
694938fd1498Szrj         {
695038fd1498Szrj           if (dump_enabled_p ())
695138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
695238fd1498Szrj 			     "unsupported condition in reduction\n");
695338fd1498Szrj 	  return false;
695438fd1498Szrj         }
695538fd1498Szrj     }
695638fd1498Szrj   else
695738fd1498Szrj     {
695838fd1498Szrj       /* 4. Supportable by target?  */
695938fd1498Szrj 
696038fd1498Szrj       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
696138fd1498Szrj 	  || code == LROTATE_EXPR || code == RROTATE_EXPR)
696238fd1498Szrj 	{
696338fd1498Szrj 	  /* Shifts and rotates are only supported by vectorizable_shifts,
696438fd1498Szrj 	     not vectorizable_reduction.  */
696538fd1498Szrj           if (dump_enabled_p ())
696638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696738fd1498Szrj 			     "unsupported shift or rotation.\n");
696838fd1498Szrj 	  return false;
696938fd1498Szrj 	}
697038fd1498Szrj 
697138fd1498Szrj       /* 4.1. check support for the operation in the loop  */
697238fd1498Szrj       optab = optab_for_tree_code (code, vectype_in, optab_default);
697338fd1498Szrj       if (!optab)
697438fd1498Szrj         {
697538fd1498Szrj           if (dump_enabled_p ())
697638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697738fd1498Szrj 			     "no optab.\n");
697838fd1498Szrj 
697938fd1498Szrj           return false;
698038fd1498Szrj         }
698138fd1498Szrj 
698238fd1498Szrj       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
698338fd1498Szrj         {
698438fd1498Szrj           if (dump_enabled_p ())
698538fd1498Szrj             dump_printf (MSG_NOTE, "op not supported by target.\n");
698638fd1498Szrj 
698738fd1498Szrj 	  if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
698838fd1498Szrj 	      || !vect_worthwhile_without_simd_p (loop_vinfo, code))
698938fd1498Szrj             return false;
699038fd1498Szrj 
699138fd1498Szrj           if (dump_enabled_p ())
699238fd1498Szrj   	    dump_printf (MSG_NOTE, "proceeding using word mode.\n");
699338fd1498Szrj         }
699438fd1498Szrj 
699538fd1498Szrj       /* Worthwhile without SIMD support?  */
699638fd1498Szrj       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
699738fd1498Szrj 	  && !vect_worthwhile_without_simd_p (loop_vinfo, code))
699838fd1498Szrj         {
699938fd1498Szrj           if (dump_enabled_p ())
700038fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
700138fd1498Szrj 			     "not worthwhile without SIMD support.\n");
700238fd1498Szrj 
700338fd1498Szrj           return false;
700438fd1498Szrj         }
700538fd1498Szrj     }
700638fd1498Szrj 
700738fd1498Szrj   /* 4.2. Check support for the epilog operation.
700838fd1498Szrj 
700938fd1498Szrj           If STMT represents a reduction pattern, then the type of the
701038fd1498Szrj           reduction variable may be different than the type of the rest
701138fd1498Szrj           of the arguments.  For example, consider the case of accumulation
701238fd1498Szrj           of shorts into an int accumulator; The original code:
701338fd1498Szrj                         S1: int_a = (int) short_a;
701438fd1498Szrj           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
701538fd1498Szrj 
701638fd1498Szrj           was replaced with:
701738fd1498Szrj                         STMT: int_acc = widen_sum <short_a, int_acc>
701838fd1498Szrj 
701938fd1498Szrj           This means that:
702038fd1498Szrj           1. The tree-code that is used to create the vector operation in the
702138fd1498Szrj              epilog code (that reduces the partial results) is not the
702238fd1498Szrj              tree-code of STMT, but is rather the tree-code of the original
702338fd1498Szrj              stmt from the pattern that STMT is replacing.  I.e, in the example
702438fd1498Szrj              above we want to use 'widen_sum' in the loop, but 'plus' in the
702538fd1498Szrj              epilog.
702638fd1498Szrj           2. The type (mode) we use to check available target support
702738fd1498Szrj              for the vector operation to be created in the *epilog*, is
702838fd1498Szrj              determined by the type of the reduction variable (in the example
702938fd1498Szrj              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
703038fd1498Szrj              However the type (mode) we use to check available target support
703138fd1498Szrj              for the vector operation to be created *inside the loop*, is
703238fd1498Szrj              determined by the type of the other arguments to STMT (in the
703338fd1498Szrj              example we'd check this: optab_handler (widen_sum_optab,
703438fd1498Szrj 	     vect_short_mode)).
703538fd1498Szrj 
703638fd1498Szrj           This is contrary to "regular" reductions, in which the types of all
703738fd1498Szrj           the arguments are the same as the type of the reduction variable.
703838fd1498Szrj           For "regular" reductions we can therefore use the same vector type
703938fd1498Szrj           (and also the same tree-code) when generating the epilog code and
704038fd1498Szrj           when generating the code inside the loop.  */
704138fd1498Szrj 
704238fd1498Szrj   vect_reduction_type reduction_type
704338fd1498Szrj     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
704438fd1498Szrj   if (orig_stmt
704538fd1498Szrj       && (reduction_type == TREE_CODE_REDUCTION
704638fd1498Szrj 	  || reduction_type == FOLD_LEFT_REDUCTION))
704738fd1498Szrj     {
704838fd1498Szrj       /* This is a reduction pattern: get the vectype from the type of the
704938fd1498Szrj          reduction variable, and get the tree-code from orig_stmt.  */
705038fd1498Szrj       orig_code = gimple_assign_rhs_code (orig_stmt);
705138fd1498Szrj       gcc_assert (vectype_out);
705238fd1498Szrj       vec_mode = TYPE_MODE (vectype_out);
705338fd1498Szrj     }
705438fd1498Szrj   else
705538fd1498Szrj     {
705638fd1498Szrj       /* Regular reduction: use the same vectype and tree-code as used for
705738fd1498Szrj          the vector code inside the loop can be used for the epilog code. */
705838fd1498Szrj       orig_code = code;
705938fd1498Szrj 
706038fd1498Szrj       if (code == MINUS_EXPR)
706138fd1498Szrj 	orig_code = PLUS_EXPR;
706238fd1498Szrj 
706338fd1498Szrj       /* For simple condition reductions, replace with the actual expression
706438fd1498Szrj 	 we want to base our reduction around.  */
706538fd1498Szrj       if (reduction_type == CONST_COND_REDUCTION)
706638fd1498Szrj 	{
706738fd1498Szrj 	  orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
706838fd1498Szrj 	  gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
706938fd1498Szrj 	}
707038fd1498Szrj       else if (reduction_type == INTEGER_INDUC_COND_REDUCTION)
707138fd1498Szrj 	orig_code = cond_reduc_op_code;
707238fd1498Szrj     }
707338fd1498Szrj 
707438fd1498Szrj   if (nested_cycle)
707538fd1498Szrj     {
707638fd1498Szrj       def_bb = gimple_bb (reduc_def_stmt);
707738fd1498Szrj       def_stmt_loop = def_bb->loop_father;
707838fd1498Szrj       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
707938fd1498Szrj                                        loop_preheader_edge (def_stmt_loop));
708038fd1498Szrj       if (TREE_CODE (def_arg) == SSA_NAME
708138fd1498Szrj           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
708238fd1498Szrj           && gimple_code (def_arg_stmt) == GIMPLE_PHI
708338fd1498Szrj           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
708438fd1498Szrj           && vinfo_for_stmt (def_arg_stmt)
708538fd1498Szrj           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
708638fd1498Szrj               == vect_double_reduction_def)
708738fd1498Szrj         double_reduc = true;
708838fd1498Szrj     }
708938fd1498Szrj 
709038fd1498Szrj   reduc_fn = IFN_LAST;
709138fd1498Szrj 
709238fd1498Szrj   if (reduction_type == TREE_CODE_REDUCTION
709338fd1498Szrj       || reduction_type == FOLD_LEFT_REDUCTION
709438fd1498Szrj       || reduction_type == INTEGER_INDUC_COND_REDUCTION
709538fd1498Szrj       || reduction_type == CONST_COND_REDUCTION)
709638fd1498Szrj     {
709738fd1498Szrj       if (reduction_type == FOLD_LEFT_REDUCTION
709838fd1498Szrj 	  ? fold_left_reduction_fn (orig_code, &reduc_fn)
709938fd1498Szrj 	  : reduction_fn_for_scalar_code (orig_code, &reduc_fn))
710038fd1498Szrj 	{
710138fd1498Szrj 	  if (reduc_fn != IFN_LAST
710238fd1498Szrj 	      && !direct_internal_fn_supported_p (reduc_fn, vectype_out,
710338fd1498Szrj 						  OPTIMIZE_FOR_SPEED))
710438fd1498Szrj 	    {
710538fd1498Szrj 	      if (dump_enabled_p ())
710638fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
710738fd1498Szrj 				 "reduc op not supported by target.\n");
710838fd1498Szrj 
710938fd1498Szrj 	      reduc_fn = IFN_LAST;
711038fd1498Szrj 	    }
711138fd1498Szrj 	}
711238fd1498Szrj       else
711338fd1498Szrj 	{
711438fd1498Szrj 	  if (!nested_cycle || double_reduc)
711538fd1498Szrj 	    {
711638fd1498Szrj 	      if (dump_enabled_p ())
711738fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711838fd1498Szrj 				 "no reduc code for scalar code.\n");
711938fd1498Szrj 
712038fd1498Szrj 	      return false;
712138fd1498Szrj 	    }
712238fd1498Szrj 	}
712338fd1498Szrj     }
712438fd1498Szrj   else if (reduction_type == COND_REDUCTION)
712538fd1498Szrj     {
712638fd1498Szrj       int scalar_precision
712738fd1498Szrj 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
712838fd1498Szrj       cr_index_scalar_type = make_unsigned_type (scalar_precision);
712938fd1498Szrj       cr_index_vector_type = build_vector_type (cr_index_scalar_type,
713038fd1498Szrj 						nunits_out);
713138fd1498Szrj 
713238fd1498Szrj       if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
713338fd1498Szrj 					  OPTIMIZE_FOR_SPEED))
713438fd1498Szrj 	reduc_fn = IFN_REDUC_MAX;
713538fd1498Szrj     }
713638fd1498Szrj 
713738fd1498Szrj   if (reduction_type != EXTRACT_LAST_REDUCTION
713838fd1498Szrj       && reduc_fn == IFN_LAST
713938fd1498Szrj       && !nunits_out.is_constant ())
714038fd1498Szrj     {
714138fd1498Szrj       if (dump_enabled_p ())
714238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714338fd1498Szrj 			 "missing target support for reduction on"
714438fd1498Szrj 			 " variable-length vectors.\n");
714538fd1498Szrj       return false;
714638fd1498Szrj     }
714738fd1498Szrj 
714838fd1498Szrj   if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
714938fd1498Szrj       && ncopies > 1)
715038fd1498Szrj     {
715138fd1498Szrj       if (dump_enabled_p ())
715238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715338fd1498Szrj 			 "multiple types in double reduction or condition "
715438fd1498Szrj 			 "reduction.\n");
715538fd1498Szrj       return false;
715638fd1498Szrj     }
715738fd1498Szrj 
715838fd1498Szrj   /* For SLP reductions, see if there is a neutral value we can use.  */
715938fd1498Szrj   tree neutral_op = NULL_TREE;
716038fd1498Szrj   if (slp_node)
716138fd1498Szrj     neutral_op
716238fd1498Szrj       = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
716338fd1498Szrj 				      GROUP_FIRST_ELEMENT (stmt_info) != NULL);
716438fd1498Szrj 
716538fd1498Szrj   if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
716638fd1498Szrj     {
716738fd1498Szrj       /* We can't support in-order reductions of code such as this:
716838fd1498Szrj 
716938fd1498Szrj 	   for (int i = 0; i < n1; ++i)
717038fd1498Szrj 	     for (int j = 0; j < n2; ++j)
717138fd1498Szrj 	       l += a[j];
717238fd1498Szrj 
717338fd1498Szrj 	 since GCC effectively transforms the loop when vectorizing:
717438fd1498Szrj 
717538fd1498Szrj 	   for (int i = 0; i < n1 / VF; ++i)
717638fd1498Szrj 	     for (int j = 0; j < n2; ++j)
717738fd1498Szrj 	       for (int k = 0; k < VF; ++k)
717838fd1498Szrj 		 l += a[j];
717938fd1498Szrj 
718038fd1498Szrj 	 which is a reassociation of the original operation.  */
718138fd1498Szrj       if (dump_enabled_p ())
718238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
718338fd1498Szrj 			 "in-order double reduction not supported.\n");
718438fd1498Szrj 
718538fd1498Szrj       return false;
718638fd1498Szrj     }
718738fd1498Szrj 
718838fd1498Szrj   if (reduction_type == FOLD_LEFT_REDUCTION
718938fd1498Szrj       && slp_node
719038fd1498Szrj       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
719138fd1498Szrj     {
719238fd1498Szrj       /* We cannot use in-order reductions in this case because there is
719338fd1498Szrj 	 an implicit reassociation of the operations involved.  */
719438fd1498Szrj       if (dump_enabled_p ())
719538fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
719638fd1498Szrj 			 "in-order unchained SLP reductions not supported.\n");
719738fd1498Szrj       return false;
719838fd1498Szrj     }
719938fd1498Szrj 
720038fd1498Szrj   /* For double reductions, and for SLP reductions with a neutral value,
720138fd1498Szrj      we construct a variable-length initial vector by loading a vector
720238fd1498Szrj      full of the neutral value and then shift-and-inserting the start
720338fd1498Szrj      values into the low-numbered elements.  */
720438fd1498Szrj   if ((double_reduc || neutral_op)
720538fd1498Szrj       && !nunits_out.is_constant ()
720638fd1498Szrj       && !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
720738fd1498Szrj 					  vectype_out, OPTIMIZE_FOR_SPEED))
720838fd1498Szrj     {
720938fd1498Szrj       if (dump_enabled_p ())
721038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721138fd1498Szrj 			 "reduction on variable-length vectors requires"
721238fd1498Szrj 			 " target support for a vector-shift-and-insert"
721338fd1498Szrj 			 " operation.\n");
721438fd1498Szrj       return false;
721538fd1498Szrj     }
721638fd1498Szrj 
721738fd1498Szrj   /* Check extra constraints for variable-length unchained SLP reductions.  */
721838fd1498Szrj   if (STMT_SLP_TYPE (stmt_info)
721938fd1498Szrj       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
722038fd1498Szrj       && !nunits_out.is_constant ())
722138fd1498Szrj     {
722238fd1498Szrj       /* We checked above that we could build the initial vector when
722338fd1498Szrj 	 there's a neutral element value.  Check here for the case in
722438fd1498Szrj 	 which each SLP statement has its own initial value and in which
722538fd1498Szrj 	 that value needs to be repeated for every instance of the
722638fd1498Szrj 	 statement within the initial vector.  */
722738fd1498Szrj       unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
722838fd1498Szrj       scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
722938fd1498Szrj       if (!neutral_op
723038fd1498Szrj 	  && !can_duplicate_and_interleave_p (group_size, elt_mode))
723138fd1498Szrj 	{
723238fd1498Szrj 	  if (dump_enabled_p ())
723338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
723438fd1498Szrj 			     "unsupported form of SLP reduction for"
723538fd1498Szrj 			     " variable-length vectors: cannot build"
723638fd1498Szrj 			     " initial vector.\n");
723738fd1498Szrj 	  return false;
723838fd1498Szrj 	}
723938fd1498Szrj       /* The epilogue code relies on the number of elements being a multiple
724038fd1498Szrj 	 of the group size.  The duplicate-and-interleave approach to setting
724138fd1498Szrj 	 up the the initial vector does too.  */
724238fd1498Szrj       if (!multiple_p (nunits_out, group_size))
724338fd1498Szrj 	{
724438fd1498Szrj 	  if (dump_enabled_p ())
724538fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724638fd1498Szrj 			     "unsupported form of SLP reduction for"
724738fd1498Szrj 			     " variable-length vectors: the vector size"
724838fd1498Szrj 			     " is not a multiple of the number of results.\n");
724938fd1498Szrj 	  return false;
725038fd1498Szrj 	}
725138fd1498Szrj     }
725238fd1498Szrj 
725338fd1498Szrj   /* In case of widenning multiplication by a constant, we update the type
725438fd1498Szrj      of the constant to be the type of the other operand.  We check that the
725538fd1498Szrj      constant fits the type in the pattern recognition pass.  */
725638fd1498Szrj   if (code == DOT_PROD_EXPR
725738fd1498Szrj       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
725838fd1498Szrj     {
725938fd1498Szrj       if (TREE_CODE (ops[0]) == INTEGER_CST)
726038fd1498Szrj         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
726138fd1498Szrj       else if (TREE_CODE (ops[1]) == INTEGER_CST)
726238fd1498Szrj         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
726338fd1498Szrj       else
726438fd1498Szrj         {
726538fd1498Szrj           if (dump_enabled_p ())
726638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
726738fd1498Szrj 			     "invalid types in dot-prod\n");
726838fd1498Szrj 
726938fd1498Szrj           return false;
727038fd1498Szrj         }
727138fd1498Szrj     }
727238fd1498Szrj 
727338fd1498Szrj   if (reduction_type == COND_REDUCTION)
727438fd1498Szrj     {
727538fd1498Szrj       widest_int ni;
727638fd1498Szrj 
727738fd1498Szrj       if (! max_loop_iterations (loop, &ni))
727838fd1498Szrj 	{
727938fd1498Szrj 	  if (dump_enabled_p ())
728038fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
728138fd1498Szrj 			     "loop count not known, cannot create cond "
728238fd1498Szrj 			     "reduction.\n");
728338fd1498Szrj 	  return false;
728438fd1498Szrj 	}
728538fd1498Szrj       /* Convert backedges to iterations.  */
728638fd1498Szrj       ni += 1;
728738fd1498Szrj 
728838fd1498Szrj       /* The additional index will be the same type as the condition.  Check
728938fd1498Szrj 	 that the loop can fit into this less one (because we'll use up the
729038fd1498Szrj 	 zero slot for when there are no matches).  */
729138fd1498Szrj       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
729238fd1498Szrj       if (wi::geu_p (ni, wi::to_widest (max_index)))
729338fd1498Szrj 	{
729438fd1498Szrj 	  if (dump_enabled_p ())
729538fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
729638fd1498Szrj 			     "loop size is greater than data size.\n");
729738fd1498Szrj 	  return false;
729838fd1498Szrj 	}
729938fd1498Szrj     }
730038fd1498Szrj 
730138fd1498Szrj   /* In case the vectorization factor (VF) is bigger than the number
730238fd1498Szrj      of elements that we can fit in a vectype (nunits), we have to generate
730338fd1498Szrj      more than one vector stmt - i.e - we need to "unroll" the
730438fd1498Szrj      vector stmt by a factor VF/nunits.  For more details see documentation
730538fd1498Szrj      in vectorizable_operation.  */
730638fd1498Szrj 
730738fd1498Szrj   /* If the reduction is used in an outer loop we need to generate
730838fd1498Szrj      VF intermediate results, like so (e.g. for ncopies=2):
730938fd1498Szrj 	r0 = phi (init, r0)
731038fd1498Szrj 	r1 = phi (init, r1)
731138fd1498Szrj 	r0 = x0 + r0;
731238fd1498Szrj         r1 = x1 + r1;
731338fd1498Szrj     (i.e. we generate VF results in 2 registers).
731438fd1498Szrj     In this case we have a separate def-use cycle for each copy, and therefore
731538fd1498Szrj     for each copy we get the vector def for the reduction variable from the
731638fd1498Szrj     respective phi node created for this copy.
731738fd1498Szrj 
731838fd1498Szrj     Otherwise (the reduction is unused in the loop nest), we can combine
731938fd1498Szrj     together intermediate results, like so (e.g. for ncopies=2):
732038fd1498Szrj 	r = phi (init, r)
732138fd1498Szrj 	r = x0 + r;
732238fd1498Szrj 	r = x1 + r;
732338fd1498Szrj    (i.e. we generate VF/2 results in a single register).
732438fd1498Szrj    In this case for each copy we get the vector def for the reduction variable
732538fd1498Szrj    from the vectorized reduction operation generated in the previous iteration.
732638fd1498Szrj 
732738fd1498Szrj    This only works when we see both the reduction PHI and its only consumer
732838fd1498Szrj    in vectorizable_reduction and there are no intermediate stmts
732938fd1498Szrj    participating.  */
733038fd1498Szrj   use_operand_p use_p;
733138fd1498Szrj   gimple *use_stmt;
733238fd1498Szrj   if (ncopies > 1
733338fd1498Szrj       && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
733438fd1498Szrj       && single_imm_use (gimple_phi_result (reduc_def_stmt), &use_p, &use_stmt)
733538fd1498Szrj       && (use_stmt == stmt
733638fd1498Szrj 	  || STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) == stmt))
733738fd1498Szrj     {
733838fd1498Szrj       single_defuse_cycle = true;
733938fd1498Szrj       epilog_copies = 1;
734038fd1498Szrj     }
734138fd1498Szrj   else
734238fd1498Szrj     epilog_copies = ncopies;
734338fd1498Szrj 
734438fd1498Szrj   /* If the reduction stmt is one of the patterns that have lane
734538fd1498Szrj      reduction embedded we cannot handle the case of ! single_defuse_cycle.  */
734638fd1498Szrj   if ((ncopies > 1
734738fd1498Szrj        && ! single_defuse_cycle)
734838fd1498Szrj       && (code == DOT_PROD_EXPR
734938fd1498Szrj 	  || code == WIDEN_SUM_EXPR
735038fd1498Szrj 	  || code == SAD_EXPR))
735138fd1498Szrj     {
735238fd1498Szrj       if (dump_enabled_p ())
735338fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735438fd1498Szrj 			 "multi def-use cycle not possible for lane-reducing "
735538fd1498Szrj 			 "reduction operation\n");
735638fd1498Szrj       return false;
735738fd1498Szrj     }
735838fd1498Szrj 
735938fd1498Szrj   if (slp_node)
736038fd1498Szrj     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
736138fd1498Szrj   else
736238fd1498Szrj     vec_num = 1;
736338fd1498Szrj 
736438fd1498Szrj   internal_fn cond_fn = get_conditional_internal_fn (code);
736538fd1498Szrj   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
736638fd1498Szrj 
736738fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
736838fd1498Szrj     {
736938fd1498Szrj       if (first_p)
737038fd1498Szrj 	vect_model_reduction_cost (stmt_info, reduc_fn, ncopies);
737138fd1498Szrj       if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
737238fd1498Szrj 	{
737338fd1498Szrj 	  if (reduction_type != FOLD_LEFT_REDUCTION
737438fd1498Szrj 	      && (cond_fn == IFN_LAST
737538fd1498Szrj 		  || !direct_internal_fn_supported_p (cond_fn, vectype_in,
737638fd1498Szrj 						      OPTIMIZE_FOR_SPEED)))
737738fd1498Szrj 	    {
737838fd1498Szrj 	      if (dump_enabled_p ())
737938fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738038fd1498Szrj 				 "can't use a fully-masked loop because no"
738138fd1498Szrj 				 " conditional operation is available.\n");
738238fd1498Szrj 	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
738338fd1498Szrj 	    }
738438fd1498Szrj 	  else if (reduc_index == -1)
738538fd1498Szrj 	    {
738638fd1498Szrj 	      if (dump_enabled_p ())
738738fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738838fd1498Szrj 				 "can't use a fully-masked loop for chained"
738938fd1498Szrj 				 " reductions.\n");
739038fd1498Szrj 	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
739138fd1498Szrj 	    }
739238fd1498Szrj 	  else
739338fd1498Szrj 	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
739438fd1498Szrj 				   vectype_in);
739538fd1498Szrj 	}
739638fd1498Szrj       if (dump_enabled_p ()
739738fd1498Szrj 	  && reduction_type == FOLD_LEFT_REDUCTION)
739838fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
739938fd1498Szrj 			 "using an in-order (fold-left) reduction.\n");
740038fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
740138fd1498Szrj       return true;
740238fd1498Szrj     }
740338fd1498Szrj 
740438fd1498Szrj   /* Transform.  */
740538fd1498Szrj 
740638fd1498Szrj   if (dump_enabled_p ())
740738fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
740838fd1498Szrj 
740938fd1498Szrj   /* FORNOW: Multiple types are not supported for condition.  */
741038fd1498Szrj   if (code == COND_EXPR)
741138fd1498Szrj     gcc_assert (ncopies == 1);
741238fd1498Szrj 
741338fd1498Szrj   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
741438fd1498Szrj 
741538fd1498Szrj   if (reduction_type == FOLD_LEFT_REDUCTION)
741638fd1498Szrj     return vectorize_fold_left_reduction
741738fd1498Szrj       (stmt, gsi, vec_stmt, slp_node, reduc_def_stmt, code,
741838fd1498Szrj        reduc_fn, ops, vectype_in, reduc_index, masks);
741938fd1498Szrj 
742038fd1498Szrj   if (reduction_type == EXTRACT_LAST_REDUCTION)
742138fd1498Szrj     {
742238fd1498Szrj       gcc_assert (!slp_node);
742338fd1498Szrj       return vectorizable_condition (stmt, gsi, vec_stmt,
742438fd1498Szrj 				     NULL, reduc_index, NULL);
742538fd1498Szrj     }
742638fd1498Szrj 
742738fd1498Szrj   /* Create the destination vector  */
742838fd1498Szrj   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
742938fd1498Szrj 
743038fd1498Szrj   prev_stmt_info = NULL;
743138fd1498Szrj   prev_phi_info = NULL;
743238fd1498Szrj   if (!slp_node)
743338fd1498Szrj     {
743438fd1498Szrj       vec_oprnds0.create (1);
743538fd1498Szrj       vec_oprnds1.create (1);
743638fd1498Szrj       if (op_type == ternary_op)
743738fd1498Szrj         vec_oprnds2.create (1);
743838fd1498Szrj     }
743938fd1498Szrj 
744038fd1498Szrj   phis.create (vec_num);
744138fd1498Szrj   vect_defs.create (vec_num);
744238fd1498Szrj   if (!slp_node)
744338fd1498Szrj     vect_defs.quick_push (NULL_TREE);
744438fd1498Szrj 
744538fd1498Szrj   if (slp_node)
744638fd1498Szrj     phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis));
744738fd1498Szrj   else
744838fd1498Szrj     phis.quick_push (STMT_VINFO_VEC_STMT (vinfo_for_stmt (reduc_def_stmt)));
744938fd1498Szrj 
745038fd1498Szrj   for (j = 0; j < ncopies; j++)
745138fd1498Szrj     {
745238fd1498Szrj       if (code == COND_EXPR)
745338fd1498Szrj         {
745438fd1498Szrj           gcc_assert (!slp_node);
745538fd1498Szrj           vectorizable_condition (stmt, gsi, vec_stmt,
745638fd1498Szrj                                   PHI_RESULT (phis[0]),
745738fd1498Szrj                                   reduc_index, NULL);
745838fd1498Szrj           /* Multiple types are not supported for condition.  */
745938fd1498Szrj           break;
746038fd1498Szrj         }
746138fd1498Szrj 
746238fd1498Szrj       /* Handle uses.  */
746338fd1498Szrj       if (j == 0)
746438fd1498Szrj         {
746538fd1498Szrj 	  if (slp_node)
746638fd1498Szrj 	    {
746738fd1498Szrj 	      /* Get vec defs for all the operands except the reduction index,
746838fd1498Szrj 		 ensuring the ordering of the ops in the vector is kept.  */
746938fd1498Szrj 	      auto_vec<tree, 3> slp_ops;
747038fd1498Szrj 	      auto_vec<vec<tree>, 3> vec_defs;
747138fd1498Szrj 
747238fd1498Szrj 	      slp_ops.quick_push (ops[0]);
747338fd1498Szrj 	      slp_ops.quick_push (ops[1]);
747438fd1498Szrj 	      if (op_type == ternary_op)
747538fd1498Szrj 		slp_ops.quick_push (ops[2]);
747638fd1498Szrj 
747738fd1498Szrj 	      vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
747838fd1498Szrj 
747938fd1498Szrj 	      vec_oprnds0.safe_splice (vec_defs[0]);
748038fd1498Szrj 	      vec_defs[0].release ();
748138fd1498Szrj 	      vec_oprnds1.safe_splice (vec_defs[1]);
748238fd1498Szrj 	      vec_defs[1].release ();
748338fd1498Szrj 	      if (op_type == ternary_op)
748438fd1498Szrj 		{
748538fd1498Szrj 		  vec_oprnds2.safe_splice (vec_defs[2]);
748638fd1498Szrj 		  vec_defs[2].release ();
748738fd1498Szrj 		}
748838fd1498Szrj 	    }
748938fd1498Szrj           else
749038fd1498Szrj 	    {
749138fd1498Szrj               vec_oprnds0.quick_push
749238fd1498Szrj 		(vect_get_vec_def_for_operand (ops[0], stmt));
749338fd1498Szrj               vec_oprnds1.quick_push
749438fd1498Szrj 		(vect_get_vec_def_for_operand (ops[1], stmt));
749538fd1498Szrj               if (op_type == ternary_op)
749638fd1498Szrj 		vec_oprnds2.quick_push
749738fd1498Szrj 		  (vect_get_vec_def_for_operand (ops[2], stmt));
749838fd1498Szrj 	    }
749938fd1498Szrj         }
750038fd1498Szrj       else
750138fd1498Szrj         {
750238fd1498Szrj           if (!slp_node)
750338fd1498Szrj             {
750438fd1498Szrj 	      gcc_assert (reduc_index != -1 || ! single_defuse_cycle);
750538fd1498Szrj 
750638fd1498Szrj 	      if (single_defuse_cycle && reduc_index == 0)
750738fd1498Szrj 		vec_oprnds0[0] = gimple_get_lhs (new_stmt);
750838fd1498Szrj 	      else
750938fd1498Szrj 		vec_oprnds0[0]
751038fd1498Szrj 		  = vect_get_vec_def_for_stmt_copy (dts[0], vec_oprnds0[0]);
751138fd1498Szrj 	      if (single_defuse_cycle && reduc_index == 1)
751238fd1498Szrj 		vec_oprnds1[0] = gimple_get_lhs (new_stmt);
751338fd1498Szrj 	      else
751438fd1498Szrj 		vec_oprnds1[0]
751538fd1498Szrj 		  = vect_get_vec_def_for_stmt_copy (dts[1], vec_oprnds1[0]);
751638fd1498Szrj 	      if (op_type == ternary_op)
751738fd1498Szrj 		{
751838fd1498Szrj 		  if (single_defuse_cycle && reduc_index == 2)
751938fd1498Szrj 		    vec_oprnds2[0] = gimple_get_lhs (new_stmt);
752038fd1498Szrj 		  else
752138fd1498Szrj 		    vec_oprnds2[0]
752238fd1498Szrj 		      = vect_get_vec_def_for_stmt_copy (dts[2], vec_oprnds2[0]);
752338fd1498Szrj 		}
752438fd1498Szrj             }
752538fd1498Szrj         }
752638fd1498Szrj 
752738fd1498Szrj       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
752838fd1498Szrj         {
752938fd1498Szrj 	  tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
753038fd1498Szrj 	  if (masked_loop_p)
753138fd1498Szrj 	    {
753238fd1498Szrj 	      /* Make sure that the reduction accumulator is vop[0].  */
753338fd1498Szrj 	      if (reduc_index == 1)
753438fd1498Szrj 		{
753538fd1498Szrj 		  gcc_assert (commutative_tree_code (code));
753638fd1498Szrj 		  std::swap (vop[0], vop[1]);
753738fd1498Szrj 		}
753838fd1498Szrj 	      tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
753938fd1498Szrj 					      vectype_in, i * ncopies + j);
754038fd1498Szrj 	      gcall *call = gimple_build_call_internal (cond_fn, 3, mask,
754138fd1498Szrj 							vop[0], vop[1]);
754238fd1498Szrj 	      new_temp = make_ssa_name (vec_dest, call);
754338fd1498Szrj 	      gimple_call_set_lhs (call, new_temp);
754438fd1498Szrj 	      gimple_call_set_nothrow (call, true);
754538fd1498Szrj 	      new_stmt = call;
754638fd1498Szrj 	    }
754738fd1498Szrj 	  else
754838fd1498Szrj 	    {
754938fd1498Szrj 	      if (op_type == ternary_op)
755038fd1498Szrj 		vop[2] = vec_oprnds2[i];
755138fd1498Szrj 
755238fd1498Szrj 	      new_temp = make_ssa_name (vec_dest, new_stmt);
755338fd1498Szrj 	      new_stmt = gimple_build_assign (new_temp, code,
755438fd1498Szrj 					      vop[0], vop[1], vop[2]);
755538fd1498Szrj 	    }
755638fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
755738fd1498Szrj 
755838fd1498Szrj           if (slp_node)
755938fd1498Szrj             {
756038fd1498Szrj               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
756138fd1498Szrj               vect_defs.quick_push (new_temp);
756238fd1498Szrj             }
756338fd1498Szrj           else
756438fd1498Szrj             vect_defs[0] = new_temp;
756538fd1498Szrj         }
756638fd1498Szrj 
756738fd1498Szrj       if (slp_node)
756838fd1498Szrj         continue;
756938fd1498Szrj 
757038fd1498Szrj       if (j == 0)
757138fd1498Szrj 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
757238fd1498Szrj       else
757338fd1498Szrj 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
757438fd1498Szrj 
757538fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
757638fd1498Szrj     }
757738fd1498Szrj 
757838fd1498Szrj   /* Finalize the reduction-phi (set its arguments) and create the
757938fd1498Szrj      epilog reduction code.  */
758038fd1498Szrj   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
758138fd1498Szrj     vect_defs[0] = gimple_get_lhs (*vec_stmt);
758238fd1498Szrj 
758338fd1498Szrj   vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt,
758438fd1498Szrj 				    epilog_copies, reduc_fn, phis,
758538fd1498Szrj 				    double_reduc, slp_node, slp_node_instance,
758638fd1498Szrj 				    cond_reduc_val, cond_reduc_op_code,
758738fd1498Szrj 				    neutral_op);
758838fd1498Szrj 
758938fd1498Szrj   return true;
759038fd1498Szrj }
759138fd1498Szrj 
759238fd1498Szrj /* Function vect_min_worthwhile_factor.
759338fd1498Szrj 
759438fd1498Szrj    For a loop where we could vectorize the operation indicated by CODE,
759538fd1498Szrj    return the minimum vectorization factor that makes it worthwhile
759638fd1498Szrj    to use generic vectors.  */
759738fd1498Szrj static unsigned int
vect_min_worthwhile_factor(enum tree_code code)759838fd1498Szrj vect_min_worthwhile_factor (enum tree_code code)
759938fd1498Szrj {
760038fd1498Szrj   switch (code)
760138fd1498Szrj     {
760238fd1498Szrj     case PLUS_EXPR:
760338fd1498Szrj     case MINUS_EXPR:
760438fd1498Szrj     case NEGATE_EXPR:
760538fd1498Szrj       return 4;
760638fd1498Szrj 
760738fd1498Szrj     case BIT_AND_EXPR:
760838fd1498Szrj     case BIT_IOR_EXPR:
760938fd1498Szrj     case BIT_XOR_EXPR:
761038fd1498Szrj     case BIT_NOT_EXPR:
761138fd1498Szrj       return 2;
761238fd1498Szrj 
761338fd1498Szrj     default:
761438fd1498Szrj       return INT_MAX;
761538fd1498Szrj     }
761638fd1498Szrj }
761738fd1498Szrj 
761838fd1498Szrj /* Return true if VINFO indicates we are doing loop vectorization and if
761938fd1498Szrj    it is worth decomposing CODE operations into scalar operations for
762038fd1498Szrj    that loop's vectorization factor.  */
762138fd1498Szrj 
762238fd1498Szrj bool
vect_worthwhile_without_simd_p(vec_info * vinfo,tree_code code)762338fd1498Szrj vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
762438fd1498Szrj {
762538fd1498Szrj   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
762638fd1498Szrj   unsigned HOST_WIDE_INT value;
762738fd1498Szrj   return (loop_vinfo
762838fd1498Szrj 	  && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value)
762938fd1498Szrj 	  && value >= vect_min_worthwhile_factor (code));
763038fd1498Szrj }
763138fd1498Szrj 
763238fd1498Szrj /* Function vectorizable_induction
763338fd1498Szrj 
763438fd1498Szrj    Check if PHI performs an induction computation that can be vectorized.
763538fd1498Szrj    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
763638fd1498Szrj    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
763738fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
763838fd1498Szrj 
763938fd1498Szrj bool
vectorizable_induction(gimple * phi,gimple_stmt_iterator * gsi ATTRIBUTE_UNUSED,gimple ** vec_stmt,slp_tree slp_node)764038fd1498Szrj vectorizable_induction (gimple *phi,
764138fd1498Szrj 			gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
764238fd1498Szrj 			gimple **vec_stmt, slp_tree slp_node)
764338fd1498Szrj {
764438fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
764538fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
764638fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
764738fd1498Szrj   unsigned ncopies;
764838fd1498Szrj   bool nested_in_vect_loop = false;
764938fd1498Szrj   struct loop *iv_loop;
765038fd1498Szrj   tree vec_def;
765138fd1498Szrj   edge pe = loop_preheader_edge (loop);
765238fd1498Szrj   basic_block new_bb;
765338fd1498Szrj   tree new_vec, vec_init, vec_step, t;
765438fd1498Szrj   tree new_name;
765538fd1498Szrj   gimple *new_stmt;
765638fd1498Szrj   gphi *induction_phi;
765738fd1498Szrj   tree induc_def, vec_dest;
765838fd1498Szrj   tree init_expr, step_expr;
765938fd1498Szrj   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
766038fd1498Szrj   unsigned i;
766138fd1498Szrj   tree expr;
766238fd1498Szrj   gimple_seq stmts;
766338fd1498Szrj   imm_use_iterator imm_iter;
766438fd1498Szrj   use_operand_p use_p;
766538fd1498Szrj   gimple *exit_phi;
766638fd1498Szrj   edge latch_e;
766738fd1498Szrj   tree loop_arg;
766838fd1498Szrj   gimple_stmt_iterator si;
766938fd1498Szrj   basic_block bb = gimple_bb (phi);
767038fd1498Szrj 
767138fd1498Szrj   if (gimple_code (phi) != GIMPLE_PHI)
767238fd1498Szrj     return false;
767338fd1498Szrj 
767438fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info))
767538fd1498Szrj     return false;
767638fd1498Szrj 
767738fd1498Szrj   /* Make sure it was recognized as induction computation.  */
767838fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
767938fd1498Szrj     return false;
768038fd1498Szrj 
768138fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
768238fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
768338fd1498Szrj 
768438fd1498Szrj   if (slp_node)
768538fd1498Szrj     ncopies = 1;
768638fd1498Szrj   else
768738fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
768838fd1498Szrj   gcc_assert (ncopies >= 1);
768938fd1498Szrj 
769038fd1498Szrj   /* FORNOW. These restrictions should be relaxed.  */
769138fd1498Szrj   if (nested_in_vect_loop_p (loop, phi))
769238fd1498Szrj     {
769338fd1498Szrj       imm_use_iterator imm_iter;
769438fd1498Szrj       use_operand_p use_p;
769538fd1498Szrj       gimple *exit_phi;
769638fd1498Szrj       edge latch_e;
769738fd1498Szrj       tree loop_arg;
769838fd1498Szrj 
769938fd1498Szrj       if (ncopies > 1)
770038fd1498Szrj 	{
770138fd1498Szrj 	  if (dump_enabled_p ())
770238fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770338fd1498Szrj 			     "multiple types in nested loop.\n");
770438fd1498Szrj 	  return false;
770538fd1498Szrj 	}
770638fd1498Szrj 
770738fd1498Szrj       /* FORNOW: outer loop induction with SLP not supported.  */
770838fd1498Szrj       if (STMT_SLP_TYPE (stmt_info))
770938fd1498Szrj 	return false;
771038fd1498Szrj 
771138fd1498Szrj       exit_phi = NULL;
771238fd1498Szrj       latch_e = loop_latch_edge (loop->inner);
771338fd1498Szrj       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
771438fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
771538fd1498Szrj 	{
771638fd1498Szrj 	  gimple *use_stmt = USE_STMT (use_p);
771738fd1498Szrj 	  if (is_gimple_debug (use_stmt))
771838fd1498Szrj 	    continue;
771938fd1498Szrj 
772038fd1498Szrj 	  if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
772138fd1498Szrj 	    {
772238fd1498Szrj 	      exit_phi = use_stmt;
772338fd1498Szrj 	      break;
772438fd1498Szrj 	    }
772538fd1498Szrj 	}
772638fd1498Szrj       if (exit_phi)
772738fd1498Szrj 	{
772838fd1498Szrj 	  stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
772938fd1498Szrj 	  if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
773038fd1498Szrj 		&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
773138fd1498Szrj 	    {
773238fd1498Szrj 	      if (dump_enabled_p ())
773338fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
773438fd1498Szrj 				 "inner-loop induction only used outside "
773538fd1498Szrj 				 "of the outer vectorized loop.\n");
773638fd1498Szrj 	      return false;
773738fd1498Szrj 	    }
773838fd1498Szrj 	}
773938fd1498Szrj 
774038fd1498Szrj       nested_in_vect_loop = true;
774138fd1498Szrj       iv_loop = loop->inner;
774238fd1498Szrj     }
774338fd1498Szrj   else
774438fd1498Szrj     iv_loop = loop;
774538fd1498Szrj   gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
774638fd1498Szrj 
774738fd1498Szrj   if (slp_node && !nunits.is_constant ())
774838fd1498Szrj     {
774938fd1498Szrj       /* The current SLP code creates the initial value element-by-element.  */
775038fd1498Szrj       if (dump_enabled_p ())
775138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
775238fd1498Szrj 			 "SLP induction not supported for variable-length"
775338fd1498Szrj 			 " vectors.\n");
775438fd1498Szrj       return false;
775538fd1498Szrj     }
775638fd1498Szrj 
775738fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
775838fd1498Szrj     {
775938fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
776038fd1498Szrj       if (dump_enabled_p ())
776138fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
776238fd1498Szrj                          "=== vectorizable_induction ===\n");
776338fd1498Szrj       vect_model_induction_cost (stmt_info, ncopies);
776438fd1498Szrj       return true;
776538fd1498Szrj     }
776638fd1498Szrj 
776738fd1498Szrj   /* Transform.  */
776838fd1498Szrj 
776938fd1498Szrj   /* Compute a vector variable, initialized with the first VF values of
777038fd1498Szrj      the induction variable.  E.g., for an iv with IV_PHI='X' and
777138fd1498Szrj      evolution S, for a vector of 4 units, we want to compute:
777238fd1498Szrj      [X, X + S, X + 2*S, X + 3*S].  */
777338fd1498Szrj 
777438fd1498Szrj   if (dump_enabled_p ())
777538fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
777638fd1498Szrj 
777738fd1498Szrj   latch_e = loop_latch_edge (iv_loop);
777838fd1498Szrj   loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
777938fd1498Szrj 
778038fd1498Szrj   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
778138fd1498Szrj   gcc_assert (step_expr != NULL_TREE);
778238fd1498Szrj 
778338fd1498Szrj   pe = loop_preheader_edge (iv_loop);
778438fd1498Szrj   init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
778538fd1498Szrj 				     loop_preheader_edge (iv_loop));
778638fd1498Szrj 
778738fd1498Szrj   stmts = NULL;
778838fd1498Szrj   if (!nested_in_vect_loop)
778938fd1498Szrj     {
779038fd1498Szrj       /* Convert the initial value to the desired type.  */
779138fd1498Szrj       tree new_type = TREE_TYPE (vectype);
779238fd1498Szrj       init_expr = gimple_convert (&stmts, new_type, init_expr);
779338fd1498Szrj 
779438fd1498Szrj       /* If we are using the loop mask to "peel" for alignment then we need
779538fd1498Szrj 	 to adjust the start value here.  */
779638fd1498Szrj       tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
779738fd1498Szrj       if (skip_niters != NULL_TREE)
779838fd1498Szrj 	{
779938fd1498Szrj 	  if (FLOAT_TYPE_P (vectype))
780038fd1498Szrj 	    skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
780138fd1498Szrj 					skip_niters);
780238fd1498Szrj 	  else
780338fd1498Szrj 	    skip_niters = gimple_convert (&stmts, new_type, skip_niters);
780438fd1498Szrj 	  tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
780538fd1498Szrj 					 skip_niters, step_expr);
780638fd1498Szrj 	  init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
780738fd1498Szrj 				    init_expr, skip_step);
780838fd1498Szrj 	}
780938fd1498Szrj     }
781038fd1498Szrj 
781138fd1498Szrj   /* Convert the step to the desired type.  */
781238fd1498Szrj   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
781338fd1498Szrj 
781438fd1498Szrj   if (stmts)
781538fd1498Szrj     {
781638fd1498Szrj       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
781738fd1498Szrj       gcc_assert (!new_bb);
781838fd1498Szrj     }
781938fd1498Szrj 
782038fd1498Szrj   /* Find the first insertion point in the BB.  */
782138fd1498Szrj   si = gsi_after_labels (bb);
782238fd1498Szrj 
782338fd1498Szrj   /* For SLP induction we have to generate several IVs as for example
782438fd1498Szrj      with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
782538fd1498Szrj      [i + 2*S, i + 3*S, i + 3*S, i + 3*S].  The step is the same uniform
782638fd1498Szrj      [VF*S, VF*S, VF*S, VF*S] for all.  */
782738fd1498Szrj   if (slp_node)
782838fd1498Szrj     {
782938fd1498Szrj       /* Enforced above.  */
783038fd1498Szrj       unsigned int const_nunits = nunits.to_constant ();
783138fd1498Szrj 
783238fd1498Szrj       /* Generate [VF*S, VF*S, ... ].  */
783338fd1498Szrj       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
783438fd1498Szrj 	{
783538fd1498Szrj 	  expr = build_int_cst (integer_type_node, vf);
783638fd1498Szrj 	  expr = fold_convert (TREE_TYPE (step_expr), expr);
783738fd1498Szrj 	}
783838fd1498Szrj       else
783938fd1498Szrj 	expr = build_int_cst (TREE_TYPE (step_expr), vf);
784038fd1498Szrj       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
784138fd1498Szrj 			      expr, step_expr);
784238fd1498Szrj       if (! CONSTANT_CLASS_P (new_name))
784338fd1498Szrj 	new_name = vect_init_vector (phi, new_name,
784438fd1498Szrj 				     TREE_TYPE (step_expr), NULL);
784538fd1498Szrj       new_vec = build_vector_from_val (vectype, new_name);
784638fd1498Szrj       vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
784738fd1498Szrj 
784838fd1498Szrj       /* Now generate the IVs.  */
784938fd1498Szrj       unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
785038fd1498Szrj       unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
785138fd1498Szrj       unsigned elts = const_nunits * nvects;
785238fd1498Szrj       unsigned nivs = least_common_multiple (group_size,
785338fd1498Szrj 					     const_nunits) / const_nunits;
785438fd1498Szrj       gcc_assert (elts % group_size == 0);
785538fd1498Szrj       tree elt = init_expr;
785638fd1498Szrj       unsigned ivn;
785738fd1498Szrj       for (ivn = 0; ivn < nivs; ++ivn)
785838fd1498Szrj 	{
785938fd1498Szrj 	  tree_vector_builder elts (vectype, const_nunits, 1);
786038fd1498Szrj 	  stmts = NULL;
786138fd1498Szrj 	  for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
786238fd1498Szrj 	    {
786338fd1498Szrj 	      if (ivn*const_nunits + eltn >= group_size
786438fd1498Szrj 		  && (ivn * const_nunits + eltn) % group_size == 0)
786538fd1498Szrj 		elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
786638fd1498Szrj 				    elt, step_expr);
786738fd1498Szrj 	      elts.quick_push (elt);
786838fd1498Szrj 	    }
786938fd1498Szrj 	  vec_init = gimple_build_vector (&stmts, &elts);
787038fd1498Szrj 	  if (stmts)
787138fd1498Szrj 	    {
787238fd1498Szrj 	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
787338fd1498Szrj 	      gcc_assert (!new_bb);
787438fd1498Szrj 	    }
787538fd1498Szrj 
787638fd1498Szrj 	  /* Create the induction-phi that defines the induction-operand.  */
787738fd1498Szrj 	  vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
787838fd1498Szrj 	  induction_phi = create_phi_node (vec_dest, iv_loop->header);
787938fd1498Szrj 	  set_vinfo_for_stmt (induction_phi,
788038fd1498Szrj 			      new_stmt_vec_info (induction_phi, loop_vinfo));
788138fd1498Szrj 	  induc_def = PHI_RESULT (induction_phi);
788238fd1498Szrj 
788338fd1498Szrj 	  /* Create the iv update inside the loop  */
788438fd1498Szrj 	  vec_def = make_ssa_name (vec_dest);
788538fd1498Szrj 	  new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
788638fd1498Szrj 	  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
788738fd1498Szrj 	  set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
788838fd1498Szrj 
788938fd1498Szrj 	  /* Set the arguments of the phi node:  */
789038fd1498Szrj 	  add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
789138fd1498Szrj 	  add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
789238fd1498Szrj 		       UNKNOWN_LOCATION);
789338fd1498Szrj 
789438fd1498Szrj 	  SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
789538fd1498Szrj 	}
789638fd1498Szrj 
789738fd1498Szrj       /* Re-use IVs when we can.  */
789838fd1498Szrj       if (ivn < nvects)
789938fd1498Szrj 	{
790038fd1498Szrj 	  unsigned vfp
790138fd1498Szrj 	    = least_common_multiple (group_size, const_nunits) / group_size;
790238fd1498Szrj 	  /* Generate [VF'*S, VF'*S, ... ].  */
790338fd1498Szrj 	  if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
790438fd1498Szrj 	    {
790538fd1498Szrj 	      expr = build_int_cst (integer_type_node, vfp);
790638fd1498Szrj 	      expr = fold_convert (TREE_TYPE (step_expr), expr);
790738fd1498Szrj 	    }
790838fd1498Szrj 	  else
790938fd1498Szrj 	    expr = build_int_cst (TREE_TYPE (step_expr), vfp);
791038fd1498Szrj 	  new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
791138fd1498Szrj 				  expr, step_expr);
791238fd1498Szrj 	  if (! CONSTANT_CLASS_P (new_name))
791338fd1498Szrj 	    new_name = vect_init_vector (phi, new_name,
791438fd1498Szrj 					 TREE_TYPE (step_expr), NULL);
791538fd1498Szrj 	  new_vec = build_vector_from_val (vectype, new_name);
791638fd1498Szrj 	  vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
791738fd1498Szrj 	  for (; ivn < nvects; ++ivn)
791838fd1498Szrj 	    {
791938fd1498Szrj 	      gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
792038fd1498Szrj 	      tree def;
792138fd1498Szrj 	      if (gimple_code (iv) == GIMPLE_PHI)
792238fd1498Szrj 		def = gimple_phi_result (iv);
792338fd1498Szrj 	      else
792438fd1498Szrj 		def = gimple_assign_lhs (iv);
792538fd1498Szrj 	      new_stmt = gimple_build_assign (make_ssa_name (vectype),
792638fd1498Szrj 					      PLUS_EXPR,
792738fd1498Szrj 					      def, vec_step);
792838fd1498Szrj 	      if (gimple_code (iv) == GIMPLE_PHI)
792938fd1498Szrj 		gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
793038fd1498Szrj 	      else
793138fd1498Szrj 		{
793238fd1498Szrj 		  gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
793338fd1498Szrj 		  gsi_insert_after (&tgsi, new_stmt, GSI_CONTINUE_LINKING);
793438fd1498Szrj 		}
793538fd1498Szrj 	      set_vinfo_for_stmt (new_stmt,
793638fd1498Szrj 				  new_stmt_vec_info (new_stmt, loop_vinfo));
793738fd1498Szrj 	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
793838fd1498Szrj 	    }
793938fd1498Szrj 	}
794038fd1498Szrj 
794138fd1498Szrj       return true;
794238fd1498Szrj     }
794338fd1498Szrj 
794438fd1498Szrj   /* Create the vector that holds the initial_value of the induction.  */
794538fd1498Szrj   if (nested_in_vect_loop)
794638fd1498Szrj     {
794738fd1498Szrj       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
794838fd1498Szrj 	 been created during vectorization of previous stmts.  We obtain it
794938fd1498Szrj 	 from the STMT_VINFO_VEC_STMT of the defining stmt.  */
795038fd1498Szrj       vec_init = vect_get_vec_def_for_operand (init_expr, phi);
795138fd1498Szrj       /* If the initial value is not of proper type, convert it.  */
795238fd1498Szrj       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
795338fd1498Szrj 	{
795438fd1498Szrj 	  new_stmt
795538fd1498Szrj 	    = gimple_build_assign (vect_get_new_ssa_name (vectype,
795638fd1498Szrj 							  vect_simple_var,
795738fd1498Szrj 							  "vec_iv_"),
795838fd1498Szrj 				   VIEW_CONVERT_EXPR,
795938fd1498Szrj 				   build1 (VIEW_CONVERT_EXPR, vectype,
796038fd1498Szrj 					   vec_init));
796138fd1498Szrj 	  vec_init = gimple_assign_lhs (new_stmt);
796238fd1498Szrj 	  new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
796338fd1498Szrj 						 new_stmt);
796438fd1498Szrj 	  gcc_assert (!new_bb);
796538fd1498Szrj 	  set_vinfo_for_stmt (new_stmt,
796638fd1498Szrj 			      new_stmt_vec_info (new_stmt, loop_vinfo));
796738fd1498Szrj 	}
796838fd1498Szrj     }
796938fd1498Szrj   else
797038fd1498Szrj     {
797138fd1498Szrj       /* iv_loop is the loop to be vectorized. Create:
797238fd1498Szrj 	 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
797338fd1498Szrj       stmts = NULL;
797438fd1498Szrj       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
797538fd1498Szrj 
797638fd1498Szrj       unsigned HOST_WIDE_INT const_nunits;
797738fd1498Szrj       if (nunits.is_constant (&const_nunits))
797838fd1498Szrj 	{
797938fd1498Szrj 	  tree_vector_builder elts (vectype, const_nunits, 1);
798038fd1498Szrj 	  elts.quick_push (new_name);
798138fd1498Szrj 	  for (i = 1; i < const_nunits; i++)
798238fd1498Szrj 	    {
798338fd1498Szrj 	      /* Create: new_name_i = new_name + step_expr  */
798438fd1498Szrj 	      new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
798538fd1498Szrj 				       new_name, step_expr);
798638fd1498Szrj 	      elts.quick_push (new_name);
798738fd1498Szrj 	    }
798838fd1498Szrj 	  /* Create a vector from [new_name_0, new_name_1, ...,
798938fd1498Szrj 	     new_name_nunits-1]  */
799038fd1498Szrj 	  vec_init = gimple_build_vector (&stmts, &elts);
799138fd1498Szrj 	}
799238fd1498Szrj       else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr)))
799338fd1498Szrj 	/* Build the initial value directly from a VEC_SERIES_EXPR.  */
799438fd1498Szrj 	vec_init = gimple_build (&stmts, VEC_SERIES_EXPR, vectype,
799538fd1498Szrj 				 new_name, step_expr);
799638fd1498Szrj       else
799738fd1498Szrj 	{
799838fd1498Szrj 	  /* Build:
799938fd1498Szrj 	        [base, base, base, ...]
800038fd1498Szrj 		+ (vectype) [0, 1, 2, ...] * [step, step, step, ...].  */
800138fd1498Szrj 	  gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)));
800238fd1498Szrj 	  gcc_assert (flag_associative_math);
800338fd1498Szrj 	  tree index = build_index_vector (vectype, 0, 1);
800438fd1498Szrj 	  tree base_vec = gimple_build_vector_from_val (&stmts, vectype,
800538fd1498Szrj 							new_name);
800638fd1498Szrj 	  tree step_vec = gimple_build_vector_from_val (&stmts, vectype,
800738fd1498Szrj 							step_expr);
800838fd1498Szrj 	  vec_init = gimple_build (&stmts, FLOAT_EXPR, vectype, index);
800938fd1498Szrj 	  vec_init = gimple_build (&stmts, MULT_EXPR, vectype,
801038fd1498Szrj 				   vec_init, step_vec);
801138fd1498Szrj 	  vec_init = gimple_build (&stmts, PLUS_EXPR, vectype,
801238fd1498Szrj 				   vec_init, base_vec);
801338fd1498Szrj 	}
801438fd1498Szrj 
801538fd1498Szrj       if (stmts)
801638fd1498Szrj 	{
801738fd1498Szrj 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
801838fd1498Szrj 	  gcc_assert (!new_bb);
801938fd1498Szrj 	}
802038fd1498Szrj     }
802138fd1498Szrj 
802238fd1498Szrj 
802338fd1498Szrj   /* Create the vector that holds the step of the induction.  */
802438fd1498Szrj   if (nested_in_vect_loop)
802538fd1498Szrj     /* iv_loop is nested in the loop to be vectorized. Generate:
802638fd1498Szrj        vec_step = [S, S, S, S]  */
802738fd1498Szrj     new_name = step_expr;
802838fd1498Szrj   else
802938fd1498Szrj     {
803038fd1498Szrj       /* iv_loop is the loop to be vectorized. Generate:
803138fd1498Szrj 	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
803238fd1498Szrj       gimple_seq seq = NULL;
803338fd1498Szrj       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
803438fd1498Szrj 	{
803538fd1498Szrj 	  expr = build_int_cst (integer_type_node, vf);
803638fd1498Szrj 	  expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
803738fd1498Szrj 	}
803838fd1498Szrj       else
803938fd1498Szrj 	expr = build_int_cst (TREE_TYPE (step_expr), vf);
804038fd1498Szrj       new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
804138fd1498Szrj 			       expr, step_expr);
804238fd1498Szrj       if (seq)
804338fd1498Szrj 	{
804438fd1498Szrj 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
804538fd1498Szrj 	  gcc_assert (!new_bb);
804638fd1498Szrj 	}
804738fd1498Szrj     }
804838fd1498Szrj 
804938fd1498Szrj   t = unshare_expr (new_name);
805038fd1498Szrj   gcc_assert (CONSTANT_CLASS_P (new_name)
805138fd1498Szrj 	      || TREE_CODE (new_name) == SSA_NAME);
805238fd1498Szrj   new_vec = build_vector_from_val (vectype, t);
805338fd1498Szrj   vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
805438fd1498Szrj 
805538fd1498Szrj 
805638fd1498Szrj   /* Create the following def-use cycle:
805738fd1498Szrj      loop prolog:
805838fd1498Szrj          vec_init = ...
805938fd1498Szrj 	 vec_step = ...
806038fd1498Szrj      loop:
806138fd1498Szrj          vec_iv = PHI <vec_init, vec_loop>
806238fd1498Szrj          ...
806338fd1498Szrj          STMT
806438fd1498Szrj          ...
806538fd1498Szrj          vec_loop = vec_iv + vec_step;  */
806638fd1498Szrj 
806738fd1498Szrj   /* Create the induction-phi that defines the induction-operand.  */
806838fd1498Szrj   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
806938fd1498Szrj   induction_phi = create_phi_node (vec_dest, iv_loop->header);
807038fd1498Szrj   set_vinfo_for_stmt (induction_phi,
807138fd1498Szrj 		      new_stmt_vec_info (induction_phi, loop_vinfo));
807238fd1498Szrj   induc_def = PHI_RESULT (induction_phi);
807338fd1498Szrj 
807438fd1498Szrj   /* Create the iv update inside the loop  */
807538fd1498Szrj   vec_def = make_ssa_name (vec_dest);
807638fd1498Szrj   new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
807738fd1498Szrj   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
807838fd1498Szrj   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
807938fd1498Szrj 
808038fd1498Szrj   /* Set the arguments of the phi node:  */
808138fd1498Szrj   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
808238fd1498Szrj   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
808338fd1498Szrj 	       UNKNOWN_LOCATION);
808438fd1498Szrj 
808538fd1498Szrj   STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi;
808638fd1498Szrj 
808738fd1498Szrj   /* In case that vectorization factor (VF) is bigger than the number
808838fd1498Szrj      of elements that we can fit in a vectype (nunits), we have to generate
808938fd1498Szrj      more than one vector stmt - i.e - we need to "unroll" the
809038fd1498Szrj      vector stmt by a factor VF/nunits.  For more details see documentation
809138fd1498Szrj      in vectorizable_operation.  */
809238fd1498Szrj 
809338fd1498Szrj   if (ncopies > 1)
809438fd1498Szrj     {
809538fd1498Szrj       gimple_seq seq = NULL;
809638fd1498Szrj       stmt_vec_info prev_stmt_vinfo;
809738fd1498Szrj       /* FORNOW. This restriction should be relaxed.  */
809838fd1498Szrj       gcc_assert (!nested_in_vect_loop);
809938fd1498Szrj 
810038fd1498Szrj       /* Create the vector that holds the step of the induction.  */
810138fd1498Szrj       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
810238fd1498Szrj 	{
810338fd1498Szrj 	  expr = build_int_cst (integer_type_node, nunits);
810438fd1498Szrj 	  expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
810538fd1498Szrj 	}
810638fd1498Szrj       else
810738fd1498Szrj 	expr = build_int_cst (TREE_TYPE (step_expr), nunits);
810838fd1498Szrj       new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
810938fd1498Szrj 			       expr, step_expr);
811038fd1498Szrj       if (seq)
811138fd1498Szrj 	{
811238fd1498Szrj 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
811338fd1498Szrj 	  gcc_assert (!new_bb);
811438fd1498Szrj 	}
811538fd1498Szrj 
811638fd1498Szrj       t = unshare_expr (new_name);
811738fd1498Szrj       gcc_assert (CONSTANT_CLASS_P (new_name)
811838fd1498Szrj 		  || TREE_CODE (new_name) == SSA_NAME);
811938fd1498Szrj       new_vec = build_vector_from_val (vectype, t);
812038fd1498Szrj       vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
812138fd1498Szrj 
812238fd1498Szrj       vec_def = induc_def;
812338fd1498Szrj       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
812438fd1498Szrj       for (i = 1; i < ncopies; i++)
812538fd1498Szrj 	{
812638fd1498Szrj 	  /* vec_i = vec_prev + vec_step  */
812738fd1498Szrj 	  new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
812838fd1498Szrj 					  vec_def, vec_step);
812938fd1498Szrj 	  vec_def = make_ssa_name (vec_dest, new_stmt);
813038fd1498Szrj 	  gimple_assign_set_lhs (new_stmt, vec_def);
813138fd1498Szrj 
813238fd1498Szrj 	  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
813338fd1498Szrj 	  set_vinfo_for_stmt (new_stmt,
813438fd1498Szrj 			      new_stmt_vec_info (new_stmt, loop_vinfo));
813538fd1498Szrj 	  STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
813638fd1498Szrj 	  prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
813738fd1498Szrj 	}
813838fd1498Szrj     }
813938fd1498Szrj 
814038fd1498Szrj   if (nested_in_vect_loop)
814138fd1498Szrj     {
814238fd1498Szrj       /* Find the loop-closed exit-phi of the induction, and record
814338fd1498Szrj          the final vector of induction results:  */
814438fd1498Szrj       exit_phi = NULL;
814538fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
814638fd1498Szrj         {
814738fd1498Szrj 	  gimple *use_stmt = USE_STMT (use_p);
814838fd1498Szrj 	  if (is_gimple_debug (use_stmt))
814938fd1498Szrj 	    continue;
815038fd1498Szrj 
815138fd1498Szrj 	  if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
815238fd1498Szrj 	    {
815338fd1498Szrj 	      exit_phi = use_stmt;
815438fd1498Szrj 	      break;
815538fd1498Szrj 	    }
815638fd1498Szrj         }
815738fd1498Szrj       if (exit_phi)
815838fd1498Szrj 	{
815938fd1498Szrj 	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
816038fd1498Szrj 	  /* FORNOW. Currently not supporting the case that an inner-loop induction
816138fd1498Szrj 	     is not used in the outer-loop (i.e. only outside the outer-loop).  */
816238fd1498Szrj 	  gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
816338fd1498Szrj 		      && !STMT_VINFO_LIVE_P (stmt_vinfo));
816438fd1498Szrj 
816538fd1498Szrj 	  STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
816638fd1498Szrj 	  if (dump_enabled_p ())
816738fd1498Szrj 	    {
816838fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
816938fd1498Szrj 			       "vector of inductions after inner-loop:");
817038fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
817138fd1498Szrj 	    }
817238fd1498Szrj 	}
817338fd1498Szrj     }
817438fd1498Szrj 
817538fd1498Szrj 
817638fd1498Szrj   if (dump_enabled_p ())
817738fd1498Szrj     {
817838fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
817938fd1498Szrj 		       "transform induction: created def-use cycle: ");
818038fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
818138fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
818238fd1498Szrj 			SSA_NAME_DEF_STMT (vec_def), 0);
818338fd1498Szrj     }
818438fd1498Szrj 
818538fd1498Szrj   return true;
818638fd1498Szrj }
818738fd1498Szrj 
818838fd1498Szrj /* Function vectorizable_live_operation.
818938fd1498Szrj 
819038fd1498Szrj    STMT computes a value that is used outside the loop.  Check if
819138fd1498Szrj    it can be supported.  */
819238fd1498Szrj 
819338fd1498Szrj bool
vectorizable_live_operation(gimple * stmt,gimple_stmt_iterator * gsi ATTRIBUTE_UNUSED,slp_tree slp_node,int slp_index,gimple ** vec_stmt)819438fd1498Szrj vectorizable_live_operation (gimple *stmt,
819538fd1498Szrj 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
819638fd1498Szrj 			     slp_tree slp_node, int slp_index,
819738fd1498Szrj 			     gimple **vec_stmt)
819838fd1498Szrj {
819938fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
820038fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
820138fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
820238fd1498Szrj   imm_use_iterator imm_iter;
820338fd1498Szrj   tree lhs, lhs_type, bitsize, vec_bitsize;
820438fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
820538fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
820638fd1498Szrj   int ncopies;
820738fd1498Szrj   gimple *use_stmt;
820838fd1498Szrj   auto_vec<tree> vec_oprnds;
820938fd1498Szrj   int vec_entry = 0;
821038fd1498Szrj   poly_uint64 vec_index = 0;
821138fd1498Szrj 
821238fd1498Szrj   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
821338fd1498Szrj 
821438fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
821538fd1498Szrj     return false;
821638fd1498Szrj 
821738fd1498Szrj   /* FORNOW.  CHECKME.  */
821838fd1498Szrj   if (nested_in_vect_loop_p (loop, stmt))
821938fd1498Szrj     return false;
822038fd1498Szrj 
822138fd1498Szrj   /* If STMT is not relevant and it is a simple assignment and its inputs are
822238fd1498Szrj      invariant then it can remain in place, unvectorized.  The original last
822338fd1498Szrj      scalar value that it computes will be used.  */
822438fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info))
822538fd1498Szrj     {
822638fd1498Szrj       gcc_assert (is_simple_and_all_uses_invariant (stmt, loop_vinfo));
822738fd1498Szrj       if (dump_enabled_p ())
822838fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
822938fd1498Szrj 			 "statement is simple and uses invariant.  Leaving in "
823038fd1498Szrj 			 "place.\n");
823138fd1498Szrj       return true;
823238fd1498Szrj     }
823338fd1498Szrj 
823438fd1498Szrj   if (slp_node)
823538fd1498Szrj     ncopies = 1;
823638fd1498Szrj   else
823738fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
823838fd1498Szrj 
823938fd1498Szrj   if (slp_node)
824038fd1498Szrj     {
824138fd1498Szrj       gcc_assert (slp_index >= 0);
824238fd1498Szrj 
824338fd1498Szrj       int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
824438fd1498Szrj       int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
824538fd1498Szrj 
824638fd1498Szrj       /* Get the last occurrence of the scalar index from the concatenation of
824738fd1498Szrj 	 all the slp vectors. Calculate which slp vector it is and the index
824838fd1498Szrj 	 within.  */
824938fd1498Szrj       poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
825038fd1498Szrj 
825138fd1498Szrj       /* Calculate which vector contains the result, and which lane of
825238fd1498Szrj 	 that vector we need.  */
825338fd1498Szrj       if (!can_div_trunc_p (pos, nunits, &vec_entry, &vec_index))
825438fd1498Szrj 	{
825538fd1498Szrj 	  if (dump_enabled_p ())
825638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
825738fd1498Szrj 			     "Cannot determine which vector holds the"
825838fd1498Szrj 			     " final result.\n");
825938fd1498Szrj 	  return false;
826038fd1498Szrj 	}
826138fd1498Szrj     }
826238fd1498Szrj 
826338fd1498Szrj   if (!vec_stmt)
826438fd1498Szrj     {
826538fd1498Szrj       /* No transformation required.  */
826638fd1498Szrj       if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
826738fd1498Szrj 	{
826838fd1498Szrj 	  if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
826938fd1498Szrj 					       OPTIMIZE_FOR_SPEED))
827038fd1498Szrj 	    {
827138fd1498Szrj 	      if (dump_enabled_p ())
827238fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
827338fd1498Szrj 				 "can't use a fully-masked loop because "
827438fd1498Szrj 				 "the target doesn't support extract last "
827538fd1498Szrj 				 "reduction.\n");
827638fd1498Szrj 	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
827738fd1498Szrj 	    }
827838fd1498Szrj 	  else if (slp_node)
827938fd1498Szrj 	    {
828038fd1498Szrj 	      if (dump_enabled_p ())
828138fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
828238fd1498Szrj 				 "can't use a fully-masked loop because an "
828338fd1498Szrj 				 "SLP statement is live after the loop.\n");
828438fd1498Szrj 	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
828538fd1498Szrj 	    }
828638fd1498Szrj 	  else if (ncopies > 1)
828738fd1498Szrj 	    {
828838fd1498Szrj 	      if (dump_enabled_p ())
828938fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
829038fd1498Szrj 				 "can't use a fully-masked loop because"
829138fd1498Szrj 				 " ncopies is greater than 1.\n");
829238fd1498Szrj 	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
829338fd1498Szrj 	    }
829438fd1498Szrj 	  else
829538fd1498Szrj 	    {
829638fd1498Szrj 	      gcc_assert (ncopies == 1 && !slp_node);
829738fd1498Szrj 	      vect_record_loop_mask (loop_vinfo,
829838fd1498Szrj 				     &LOOP_VINFO_MASKS (loop_vinfo),
829938fd1498Szrj 				     1, vectype);
830038fd1498Szrj 	    }
830138fd1498Szrj 	}
830238fd1498Szrj       return true;
830338fd1498Szrj     }
830438fd1498Szrj 
830538fd1498Szrj   /* If stmt has a related stmt, then use that for getting the lhs.  */
830638fd1498Szrj   if (is_pattern_stmt_p (stmt_info))
830738fd1498Szrj     stmt = STMT_VINFO_RELATED_STMT (stmt_info);
830838fd1498Szrj 
830938fd1498Szrj   lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
831038fd1498Szrj 	: gimple_get_lhs (stmt);
831138fd1498Szrj   lhs_type = TREE_TYPE (lhs);
831238fd1498Szrj 
831338fd1498Szrj   bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype)
831438fd1498Szrj 	     ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype)))
831538fd1498Szrj 	     : TYPE_SIZE (TREE_TYPE (vectype)));
831638fd1498Szrj   vec_bitsize = TYPE_SIZE (vectype);
831738fd1498Szrj 
831838fd1498Szrj   /* Get the vectorized lhs of STMT and the lane to use (counted in bits).  */
831938fd1498Szrj   tree vec_lhs, bitstart;
832038fd1498Szrj   if (slp_node)
832138fd1498Szrj     {
832238fd1498Szrj       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
832338fd1498Szrj 
832438fd1498Szrj       /* Get the correct slp vectorized stmt.  */
832538fd1498Szrj       gimple *vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry];
832638fd1498Szrj       if (gphi *phi = dyn_cast <gphi *> (vec_stmt))
832738fd1498Szrj 	vec_lhs = gimple_phi_result (phi);
832838fd1498Szrj       else
832938fd1498Szrj 	vec_lhs = gimple_get_lhs (vec_stmt);
833038fd1498Szrj 
833138fd1498Szrj       /* Get entry to use.  */
833238fd1498Szrj       bitstart = bitsize_int (vec_index);
833338fd1498Szrj       bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
833438fd1498Szrj     }
833538fd1498Szrj   else
833638fd1498Szrj     {
833738fd1498Szrj       enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info);
833838fd1498Szrj       vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt);
833938fd1498Szrj       gcc_checking_assert (ncopies == 1
834038fd1498Szrj 			   || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
834138fd1498Szrj 
834238fd1498Szrj       /* For multiple copies, get the last copy.  */
834338fd1498Szrj       for (int i = 1; i < ncopies; ++i)
834438fd1498Szrj 	vec_lhs = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type,
834538fd1498Szrj 						  vec_lhs);
834638fd1498Szrj 
834738fd1498Szrj       /* Get the last lane in the vector.  */
834838fd1498Szrj       bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
834938fd1498Szrj     }
835038fd1498Szrj 
835138fd1498Szrj   gimple_seq stmts = NULL;
835238fd1498Szrj   tree new_tree;
835338fd1498Szrj   if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
835438fd1498Szrj     {
835538fd1498Szrj       /* Emit:
835638fd1498Szrj 
835738fd1498Szrj 	   SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
835838fd1498Szrj 
835938fd1498Szrj 	 where VEC_LHS is the vectorized live-out result and MASK is
836038fd1498Szrj 	 the loop mask for the final iteration.  */
836138fd1498Szrj       gcc_assert (ncopies == 1 && !slp_node);
836238fd1498Szrj       tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
836338fd1498Szrj       tree scalar_res = make_ssa_name (scalar_type);
836438fd1498Szrj       tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo),
836538fd1498Szrj 				      1, vectype, 0);
836638fd1498Szrj       gcall *new_stmt = gimple_build_call_internal (IFN_EXTRACT_LAST,
836738fd1498Szrj 						    2, mask, vec_lhs);
836838fd1498Szrj       gimple_call_set_lhs (new_stmt, scalar_res);
836938fd1498Szrj       gimple_seq_add_stmt (&stmts, new_stmt);
837038fd1498Szrj 
837138fd1498Szrj       /* Convert the extracted vector element to the required scalar type.  */
837238fd1498Szrj       new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
837338fd1498Szrj     }
837438fd1498Szrj   else
837538fd1498Szrj     {
837638fd1498Szrj       tree bftype = TREE_TYPE (vectype);
837738fd1498Szrj       if (VECTOR_BOOLEAN_TYPE_P (vectype))
837838fd1498Szrj 	bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
837938fd1498Szrj       new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart);
838038fd1498Szrj       new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
838138fd1498Szrj 				       &stmts, true, NULL_TREE);
838238fd1498Szrj     }
838338fd1498Szrj 
838438fd1498Szrj   if (stmts)
838538fd1498Szrj     gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts);
838638fd1498Szrj 
838738fd1498Szrj   /* Replace use of lhs with newly computed result.  If the use stmt is a
838838fd1498Szrj      single arg PHI, just replace all uses of PHI result.  It's necessary
838938fd1498Szrj      because lcssa PHI defining lhs may be before newly inserted stmt.  */
839038fd1498Szrj   use_operand_p use_p;
839138fd1498Szrj   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
839238fd1498Szrj     if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
839338fd1498Szrj 	&& !is_gimple_debug (use_stmt))
839438fd1498Szrj     {
839538fd1498Szrj       if (gimple_code (use_stmt) == GIMPLE_PHI
839638fd1498Szrj 	  && gimple_phi_num_args (use_stmt) == 1)
839738fd1498Szrj 	{
839838fd1498Szrj 	  replace_uses_by (gimple_phi_result (use_stmt), new_tree);
839938fd1498Szrj 	}
840038fd1498Szrj       else
840138fd1498Szrj 	{
840238fd1498Szrj 	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
840338fd1498Szrj 	    SET_USE (use_p, new_tree);
840438fd1498Szrj 	}
840538fd1498Szrj       update_stmt (use_stmt);
840638fd1498Szrj     }
840738fd1498Szrj 
840838fd1498Szrj   return true;
840938fd1498Szrj }
841038fd1498Szrj 
841138fd1498Szrj /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
841238fd1498Szrj 
841338fd1498Szrj static void
vect_loop_kill_debug_uses(struct loop * loop,gimple * stmt)841438fd1498Szrj vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
841538fd1498Szrj {
841638fd1498Szrj   ssa_op_iter op_iter;
841738fd1498Szrj   imm_use_iterator imm_iter;
841838fd1498Szrj   def_operand_p def_p;
841938fd1498Szrj   gimple *ustmt;
842038fd1498Szrj 
842138fd1498Szrj   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
842238fd1498Szrj     {
842338fd1498Szrj       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
842438fd1498Szrj 	{
842538fd1498Szrj 	  basic_block bb;
842638fd1498Szrj 
842738fd1498Szrj 	  if (!is_gimple_debug (ustmt))
842838fd1498Szrj 	    continue;
842938fd1498Szrj 
843038fd1498Szrj 	  bb = gimple_bb (ustmt);
843138fd1498Szrj 
843238fd1498Szrj 	  if (!flow_bb_inside_loop_p (loop, bb))
843338fd1498Szrj 	    {
843438fd1498Szrj 	      if (gimple_debug_bind_p (ustmt))
843538fd1498Szrj 		{
843638fd1498Szrj 		  if (dump_enabled_p ())
843738fd1498Szrj 		    dump_printf_loc (MSG_NOTE, vect_location,
843838fd1498Szrj                                      "killing debug use\n");
843938fd1498Szrj 
844038fd1498Szrj 		  gimple_debug_bind_reset_value (ustmt);
844138fd1498Szrj 		  update_stmt (ustmt);
844238fd1498Szrj 		}
844338fd1498Szrj 	      else
844438fd1498Szrj 		gcc_unreachable ();
844538fd1498Szrj 	    }
844638fd1498Szrj 	}
844738fd1498Szrj     }
844838fd1498Szrj }
844938fd1498Szrj 
845038fd1498Szrj /* Given loop represented by LOOP_VINFO, return true if computation of
845138fd1498Szrj    LOOP_VINFO_NITERS (= LOOP_VINFO_NITERSM1 + 1) doesn't overflow, false
845238fd1498Szrj    otherwise.  */
845338fd1498Szrj 
845438fd1498Szrj static bool
loop_niters_no_overflow(loop_vec_info loop_vinfo)845538fd1498Szrj loop_niters_no_overflow (loop_vec_info loop_vinfo)
845638fd1498Szrj {
845738fd1498Szrj   /* Constant case.  */
845838fd1498Szrj   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
845938fd1498Szrj     {
846038fd1498Szrj       tree cst_niters = LOOP_VINFO_NITERS (loop_vinfo);
846138fd1498Szrj       tree cst_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
846238fd1498Szrj 
846338fd1498Szrj       gcc_assert (TREE_CODE (cst_niters) == INTEGER_CST);
846438fd1498Szrj       gcc_assert (TREE_CODE (cst_nitersm1) == INTEGER_CST);
846538fd1498Szrj       if (wi::to_widest (cst_nitersm1) < wi::to_widest (cst_niters))
846638fd1498Szrj 	return true;
846738fd1498Szrj     }
846838fd1498Szrj 
846938fd1498Szrj   widest_int max;
847038fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
847138fd1498Szrj   /* Check the upper bound of loop niters.  */
847238fd1498Szrj   if (get_max_loop_iterations (loop, &max))
847338fd1498Szrj     {
847438fd1498Szrj       tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
847538fd1498Szrj       signop sgn = TYPE_SIGN (type);
847638fd1498Szrj       widest_int type_max = widest_int::from (wi::max_value (type), sgn);
847738fd1498Szrj       if (max < type_max)
847838fd1498Szrj 	return true;
847938fd1498Szrj     }
848038fd1498Szrj   return false;
848138fd1498Szrj }
848238fd1498Szrj 
848338fd1498Szrj /* Return a mask type with half the number of elements as TYPE.  */
848438fd1498Szrj 
848538fd1498Szrj tree
vect_halve_mask_nunits(tree type)848638fd1498Szrj vect_halve_mask_nunits (tree type)
848738fd1498Szrj {
848838fd1498Szrj   poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (type), 2);
848938fd1498Szrj   return build_truth_vector_type (nunits, current_vector_size);
849038fd1498Szrj }
849138fd1498Szrj 
849238fd1498Szrj /* Return a mask type with twice as many elements as TYPE.  */
849338fd1498Szrj 
849438fd1498Szrj tree
vect_double_mask_nunits(tree type)849538fd1498Szrj vect_double_mask_nunits (tree type)
849638fd1498Szrj {
849738fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type) * 2;
849838fd1498Szrj   return build_truth_vector_type (nunits, current_vector_size);
849938fd1498Szrj }
850038fd1498Szrj 
850138fd1498Szrj /* Record that a fully-masked version of LOOP_VINFO would need MASKS to
850238fd1498Szrj    contain a sequence of NVECTORS masks that each control a vector of type
850338fd1498Szrj    VECTYPE.  */
850438fd1498Szrj 
850538fd1498Szrj void
vect_record_loop_mask(loop_vec_info loop_vinfo,vec_loop_masks * masks,unsigned int nvectors,tree vectype)850638fd1498Szrj vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
850738fd1498Szrj 		       unsigned int nvectors, tree vectype)
850838fd1498Szrj {
850938fd1498Szrj   gcc_assert (nvectors != 0);
851038fd1498Szrj   if (masks->length () < nvectors)
851138fd1498Szrj     masks->safe_grow_cleared (nvectors);
851238fd1498Szrj   rgroup_masks *rgm = &(*masks)[nvectors - 1];
851338fd1498Szrj   /* The number of scalars per iteration and the number of vectors are
851438fd1498Szrj      both compile-time constants.  */
851538fd1498Szrj   unsigned int nscalars_per_iter
851638fd1498Szrj     = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
851738fd1498Szrj 		 LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
851838fd1498Szrj   if (rgm->max_nscalars_per_iter < nscalars_per_iter)
851938fd1498Szrj     {
852038fd1498Szrj       rgm->max_nscalars_per_iter = nscalars_per_iter;
852138fd1498Szrj       rgm->mask_type = build_same_sized_truth_vector_type (vectype);
852238fd1498Szrj     }
852338fd1498Szrj }
852438fd1498Szrj 
852538fd1498Szrj /* Given a complete set of masks MASKS, extract mask number INDEX
852638fd1498Szrj    for an rgroup that operates on NVECTORS vectors of type VECTYPE,
852738fd1498Szrj    where 0 <= INDEX < NVECTORS.  Insert any set-up statements before GSI.
852838fd1498Szrj 
852938fd1498Szrj    See the comment above vec_loop_masks for more details about the mask
853038fd1498Szrj    arrangement.  */
853138fd1498Szrj 
853238fd1498Szrj tree
vect_get_loop_mask(gimple_stmt_iterator * gsi,vec_loop_masks * masks,unsigned int nvectors,tree vectype,unsigned int index)853338fd1498Szrj vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks,
853438fd1498Szrj 		    unsigned int nvectors, tree vectype, unsigned int index)
853538fd1498Szrj {
853638fd1498Szrj   rgroup_masks *rgm = &(*masks)[nvectors - 1];
853738fd1498Szrj   tree mask_type = rgm->mask_type;
853838fd1498Szrj 
853938fd1498Szrj   /* Populate the rgroup's mask array, if this is the first time we've
854038fd1498Szrj      used it.  */
854138fd1498Szrj   if (rgm->masks.is_empty ())
854238fd1498Szrj     {
854338fd1498Szrj       rgm->masks.safe_grow_cleared (nvectors);
854438fd1498Szrj       for (unsigned int i = 0; i < nvectors; ++i)
854538fd1498Szrj 	{
854638fd1498Szrj 	  tree mask = make_temp_ssa_name (mask_type, NULL, "loop_mask");
854738fd1498Szrj 	  /* Provide a dummy definition until the real one is available.  */
854838fd1498Szrj 	  SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
854938fd1498Szrj 	  rgm->masks[i] = mask;
855038fd1498Szrj 	}
855138fd1498Szrj     }
855238fd1498Szrj 
855338fd1498Szrj   tree mask = rgm->masks[index];
855438fd1498Szrj   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
855538fd1498Szrj 		TYPE_VECTOR_SUBPARTS (vectype)))
855638fd1498Szrj     {
855738fd1498Szrj       /* A loop mask for data type X can be reused for data type Y
855838fd1498Szrj 	 if X has N times more elements than Y and if Y's elements
855938fd1498Szrj 	 are N times bigger than X's.  In this case each sequence
856038fd1498Szrj 	 of N elements in the loop mask will be all-zero or all-one.
856138fd1498Szrj 	 We can then view-convert the mask so that each sequence of
856238fd1498Szrj 	 N elements is replaced by a single element.  */
856338fd1498Szrj       gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type),
856438fd1498Szrj 			      TYPE_VECTOR_SUBPARTS (vectype)));
856538fd1498Szrj       gimple_seq seq = NULL;
856638fd1498Szrj       mask_type = build_same_sized_truth_vector_type (vectype);
856738fd1498Szrj       mask = gimple_build (&seq, VIEW_CONVERT_EXPR, mask_type, mask);
856838fd1498Szrj       if (seq)
856938fd1498Szrj 	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
857038fd1498Szrj     }
857138fd1498Szrj   return mask;
857238fd1498Szrj }
857338fd1498Szrj 
857438fd1498Szrj /* Scale profiling counters by estimation for LOOP which is vectorized
857538fd1498Szrj    by factor VF.  */
857638fd1498Szrj 
857738fd1498Szrj static void
scale_profile_for_vect_loop(struct loop * loop,unsigned vf)857838fd1498Szrj scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
857938fd1498Szrj {
858038fd1498Szrj   edge preheader = loop_preheader_edge (loop);
858138fd1498Szrj   /* Reduce loop iterations by the vectorization factor.  */
858238fd1498Szrj   gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
858338fd1498Szrj   profile_count freq_h = loop->header->count, freq_e = preheader->count ();
858438fd1498Szrj 
858538fd1498Szrj   if (freq_h.nonzero_p ())
858638fd1498Szrj     {
858738fd1498Szrj       profile_probability p;
858838fd1498Szrj 
858938fd1498Szrj       /* Avoid dropping loop body profile counter to 0 because of zero count
859038fd1498Szrj 	 in loop's preheader.  */
859138fd1498Szrj       if (!(freq_e == profile_count::zero ()))
859238fd1498Szrj         freq_e = freq_e.force_nonzero ();
859338fd1498Szrj       p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h);
859438fd1498Szrj       scale_loop_frequencies (loop, p);
859538fd1498Szrj     }
859638fd1498Szrj 
859738fd1498Szrj   edge exit_e = single_exit (loop);
859838fd1498Szrj   exit_e->probability = profile_probability::always ()
859938fd1498Szrj 				 .apply_scale (1, new_est_niter + 1);
860038fd1498Szrj 
860138fd1498Szrj   edge exit_l = single_pred_edge (loop->latch);
860238fd1498Szrj   profile_probability prob = exit_l->probability;
860338fd1498Szrj   exit_l->probability = exit_e->probability.invert ();
860438fd1498Szrj   if (prob.initialized_p () && exit_l->probability.initialized_p ())
860538fd1498Szrj     scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
860638fd1498Szrj }
860738fd1498Szrj 
860838fd1498Szrj /* Function vect_transform_loop.
860938fd1498Szrj 
861038fd1498Szrj    The analysis phase has determined that the loop is vectorizable.
861138fd1498Szrj    Vectorize the loop - created vectorized stmts to replace the scalar
861238fd1498Szrj    stmts in the loop, and update the loop exit condition.
861338fd1498Szrj    Returns scalar epilogue loop if any.  */
861438fd1498Szrj 
861538fd1498Szrj struct loop *
vect_transform_loop(loop_vec_info loop_vinfo)861638fd1498Szrj vect_transform_loop (loop_vec_info loop_vinfo)
861738fd1498Szrj {
861838fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
861938fd1498Szrj   struct loop *epilogue = NULL;
862038fd1498Szrj   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
862138fd1498Szrj   int nbbs = loop->num_nodes;
862238fd1498Szrj   int i;
862338fd1498Szrj   tree niters_vector = NULL_TREE;
862438fd1498Szrj   tree step_vector = NULL_TREE;
862538fd1498Szrj   tree niters_vector_mult_vf = NULL_TREE;
862638fd1498Szrj   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
862738fd1498Szrj   unsigned int lowest_vf = constant_lower_bound (vf);
862838fd1498Szrj   bool grouped_store;
862938fd1498Szrj   bool slp_scheduled = false;
863038fd1498Szrj   gimple *stmt, *pattern_stmt;
863138fd1498Szrj   gimple_seq pattern_def_seq = NULL;
863238fd1498Szrj   gimple_stmt_iterator pattern_def_si = gsi_none ();
863338fd1498Szrj   bool transform_pattern_stmt = false;
863438fd1498Szrj   bool check_profitability = false;
863538fd1498Szrj   unsigned int th;
863638fd1498Szrj 
863738fd1498Szrj   if (dump_enabled_p ())
863838fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
863938fd1498Szrj 
864038fd1498Szrj   /* Use the more conservative vectorization threshold.  If the number
864138fd1498Szrj      of iterations is constant assume the cost check has been performed
864238fd1498Szrj      by our caller.  If the threshold makes all loops profitable that
864338fd1498Szrj      run at least the (estimated) vectorization factor number of times
864438fd1498Szrj      checking is pointless, too.  */
864538fd1498Szrj   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
864638fd1498Szrj   if (th >= vect_vf_for_cost (loop_vinfo)
864738fd1498Szrj       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
864838fd1498Szrj     {
864938fd1498Szrj       if (dump_enabled_p ())
865038fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
865138fd1498Szrj 			 "Profitability threshold is %d loop iterations.\n",
865238fd1498Szrj                          th);
865338fd1498Szrj       check_profitability = true;
865438fd1498Szrj     }
865538fd1498Szrj 
865638fd1498Szrj   /* Make sure there exists a single-predecessor exit bb.  Do this before
865738fd1498Szrj      versioning.   */
865838fd1498Szrj   edge e = single_exit (loop);
865938fd1498Szrj   if (! single_pred_p (e->dest))
866038fd1498Szrj     {
866138fd1498Szrj       split_loop_exit_edge (e);
866238fd1498Szrj       if (dump_enabled_p ())
866338fd1498Szrj 	dump_printf (MSG_NOTE, "split exit edge\n");
866438fd1498Szrj     }
866538fd1498Szrj 
866638fd1498Szrj   /* Version the loop first, if required, so the profitability check
866738fd1498Szrj      comes first.  */
866838fd1498Szrj 
866938fd1498Szrj   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
867038fd1498Szrj     {
867138fd1498Szrj       poly_uint64 versioning_threshold
867238fd1498Szrj 	= LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
867338fd1498Szrj       if (check_profitability
867438fd1498Szrj 	  && ordered_p (poly_uint64 (th), versioning_threshold))
867538fd1498Szrj 	{
867638fd1498Szrj 	  versioning_threshold = ordered_max (poly_uint64 (th),
867738fd1498Szrj 					      versioning_threshold);
867838fd1498Szrj 	  check_profitability = false;
867938fd1498Szrj 	}
868038fd1498Szrj       vect_loop_versioning (loop_vinfo, th, check_profitability,
868138fd1498Szrj 			    versioning_threshold);
868238fd1498Szrj       check_profitability = false;
868338fd1498Szrj     }
868438fd1498Szrj 
868538fd1498Szrj   /* Make sure there exists a single-predecessor exit bb also on the
868638fd1498Szrj      scalar loop copy.  Do this after versioning but before peeling
868738fd1498Szrj      so CFG structure is fine for both scalar and if-converted loop
868838fd1498Szrj      to make slpeel_duplicate_current_defs_from_edges face matched
868938fd1498Szrj      loop closed PHI nodes on the exit.  */
869038fd1498Szrj   if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
869138fd1498Szrj     {
869238fd1498Szrj       e = single_exit (LOOP_VINFO_SCALAR_LOOP (loop_vinfo));
869338fd1498Szrj       if (! single_pred_p (e->dest))
869438fd1498Szrj 	{
869538fd1498Szrj 	  split_loop_exit_edge (e);
869638fd1498Szrj 	  if (dump_enabled_p ())
869738fd1498Szrj 	    dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
869838fd1498Szrj 	}
869938fd1498Szrj     }
870038fd1498Szrj 
870138fd1498Szrj   tree niters = vect_build_loop_niters (loop_vinfo);
870238fd1498Szrj   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
870338fd1498Szrj   tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
870438fd1498Szrj   bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
870538fd1498Szrj   epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
870638fd1498Szrj 			      &step_vector, &niters_vector_mult_vf, th,
870738fd1498Szrj 			      check_profitability, niters_no_overflow);
870838fd1498Szrj 
870938fd1498Szrj   if (niters_vector == NULL_TREE)
871038fd1498Szrj     {
871138fd1498Szrj       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
871238fd1498Szrj 	  && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
871338fd1498Szrj 	  && known_eq (lowest_vf, vf))
871438fd1498Szrj 	{
871538fd1498Szrj 	  niters_vector
871638fd1498Szrj 	    = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
871738fd1498Szrj 			     LOOP_VINFO_INT_NITERS (loop_vinfo) / lowest_vf);
871838fd1498Szrj 	  step_vector = build_one_cst (TREE_TYPE (niters));
871938fd1498Szrj 	}
872038fd1498Szrj       else
872138fd1498Szrj 	vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
872238fd1498Szrj 				     &step_vector, niters_no_overflow);
872338fd1498Szrj     }
872438fd1498Szrj 
872538fd1498Szrj   /* 1) Make sure the loop header has exactly two entries
872638fd1498Szrj      2) Make sure we have a preheader basic block.  */
872738fd1498Szrj 
872838fd1498Szrj   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
872938fd1498Szrj 
873038fd1498Szrj   split_edge (loop_preheader_edge (loop));
873138fd1498Szrj 
873238fd1498Szrj   if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
873338fd1498Szrj       && vect_use_loop_mask_for_alignment_p (loop_vinfo))
873438fd1498Szrj     /* This will deal with any possible peeling.  */
873538fd1498Szrj     vect_prepare_for_masked_peels (loop_vinfo);
873638fd1498Szrj 
873738fd1498Szrj   /* FORNOW: the vectorizer supports only loops which body consist
873838fd1498Szrj      of one basic block (header + empty latch). When the vectorizer will
873938fd1498Szrj      support more involved loop forms, the order by which the BBs are
874038fd1498Szrj      traversed need to be reconsidered.  */
874138fd1498Szrj 
874238fd1498Szrj   for (i = 0; i < nbbs; i++)
874338fd1498Szrj     {
874438fd1498Szrj       basic_block bb = bbs[i];
874538fd1498Szrj       stmt_vec_info stmt_info;
874638fd1498Szrj 
874738fd1498Szrj       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
874838fd1498Szrj 	   gsi_next (&si))
874938fd1498Szrj         {
875038fd1498Szrj 	  gphi *phi = si.phi ();
875138fd1498Szrj 	  if (dump_enabled_p ())
875238fd1498Szrj 	    {
875338fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
875438fd1498Szrj                                "------>vectorizing phi: ");
875538fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
875638fd1498Szrj 	    }
875738fd1498Szrj 	  stmt_info = vinfo_for_stmt (phi);
875838fd1498Szrj 	  if (!stmt_info)
875938fd1498Szrj 	    continue;
876038fd1498Szrj 
876138fd1498Szrj 	  if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
876238fd1498Szrj 	    vect_loop_kill_debug_uses (loop, phi);
876338fd1498Szrj 
876438fd1498Szrj 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
876538fd1498Szrj 	      && !STMT_VINFO_LIVE_P (stmt_info))
876638fd1498Szrj 	    continue;
876738fd1498Szrj 
876838fd1498Szrj 	  if (STMT_VINFO_VECTYPE (stmt_info)
876938fd1498Szrj 	      && (maybe_ne
877038fd1498Szrj 		  (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), vf))
877138fd1498Szrj 	      && dump_enabled_p ())
877238fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
877338fd1498Szrj 
877438fd1498Szrj 	  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
877538fd1498Szrj 	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
877638fd1498Szrj 	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
877738fd1498Szrj 	      && ! PURE_SLP_STMT (stmt_info))
877838fd1498Szrj 	    {
877938fd1498Szrj 	      if (dump_enabled_p ())
878038fd1498Szrj 		dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
878138fd1498Szrj 	      vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
878238fd1498Szrj 	    }
878338fd1498Szrj 	}
878438fd1498Szrj 
878538fd1498Szrj       pattern_stmt = NULL;
878638fd1498Szrj       for (gimple_stmt_iterator si = gsi_start_bb (bb);
878738fd1498Szrj 	   !gsi_end_p (si) || transform_pattern_stmt;)
878838fd1498Szrj 	{
878938fd1498Szrj 	  bool is_store;
879038fd1498Szrj 
879138fd1498Szrj           if (transform_pattern_stmt)
879238fd1498Szrj 	    stmt = pattern_stmt;
879338fd1498Szrj           else
879438fd1498Szrj 	    {
879538fd1498Szrj 	      stmt = gsi_stmt (si);
879638fd1498Szrj 	      /* During vectorization remove existing clobber stmts.  */
879738fd1498Szrj 	      if (gimple_clobber_p (stmt))
879838fd1498Szrj 		{
879938fd1498Szrj 		  unlink_stmt_vdef (stmt);
880038fd1498Szrj 		  gsi_remove (&si, true);
880138fd1498Szrj 		  release_defs (stmt);
880238fd1498Szrj 		  continue;
880338fd1498Szrj 		}
880438fd1498Szrj 	    }
880538fd1498Szrj 
880638fd1498Szrj 	  if (dump_enabled_p ())
880738fd1498Szrj 	    {
880838fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
880938fd1498Szrj 			       "------>vectorizing statement: ");
881038fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
881138fd1498Szrj 	    }
881238fd1498Szrj 
881338fd1498Szrj 	  stmt_info = vinfo_for_stmt (stmt);
881438fd1498Szrj 
881538fd1498Szrj 	  /* vector stmts created in the outer-loop during vectorization of
881638fd1498Szrj 	     stmts in an inner-loop may not have a stmt_info, and do not
881738fd1498Szrj 	     need to be vectorized.  */
881838fd1498Szrj 	  if (!stmt_info)
881938fd1498Szrj 	    {
882038fd1498Szrj 	      gsi_next (&si);
882138fd1498Szrj 	      continue;
882238fd1498Szrj 	    }
882338fd1498Szrj 
882438fd1498Szrj 	  if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
882538fd1498Szrj 	    vect_loop_kill_debug_uses (loop, stmt);
882638fd1498Szrj 
882738fd1498Szrj 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
882838fd1498Szrj 	      && !STMT_VINFO_LIVE_P (stmt_info))
882938fd1498Szrj             {
883038fd1498Szrj               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
883138fd1498Szrj                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
883238fd1498Szrj                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
883338fd1498Szrj                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
883438fd1498Szrj                 {
883538fd1498Szrj                   stmt = pattern_stmt;
883638fd1498Szrj                   stmt_info = vinfo_for_stmt (stmt);
883738fd1498Szrj                 }
883838fd1498Szrj               else
883938fd1498Szrj 	        {
884038fd1498Szrj    	          gsi_next (&si);
884138fd1498Szrj 	          continue;
884238fd1498Szrj                 }
884338fd1498Szrj 	    }
884438fd1498Szrj           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
884538fd1498Szrj                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
884638fd1498Szrj                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
884738fd1498Szrj                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
884838fd1498Szrj             transform_pattern_stmt = true;
884938fd1498Szrj 
885038fd1498Szrj 	  /* If pattern statement has def stmts, vectorize them too.  */
885138fd1498Szrj 	  if (is_pattern_stmt_p (stmt_info))
885238fd1498Szrj 	    {
885338fd1498Szrj 	      if (pattern_def_seq == NULL)
885438fd1498Szrj 		{
885538fd1498Szrj 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
885638fd1498Szrj 		  pattern_def_si = gsi_start (pattern_def_seq);
885738fd1498Szrj 		}
885838fd1498Szrj 	      else if (!gsi_end_p (pattern_def_si))
885938fd1498Szrj 		gsi_next (&pattern_def_si);
886038fd1498Szrj 	      if (pattern_def_seq != NULL)
886138fd1498Szrj 		{
886238fd1498Szrj 		  gimple *pattern_def_stmt = NULL;
886338fd1498Szrj 		  stmt_vec_info pattern_def_stmt_info = NULL;
886438fd1498Szrj 
886538fd1498Szrj 		  while (!gsi_end_p (pattern_def_si))
886638fd1498Szrj 		    {
886738fd1498Szrj 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
886838fd1498Szrj 		      pattern_def_stmt_info
886938fd1498Szrj 			= vinfo_for_stmt (pattern_def_stmt);
887038fd1498Szrj 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
887138fd1498Szrj 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
887238fd1498Szrj 			break;
887338fd1498Szrj 		      gsi_next (&pattern_def_si);
887438fd1498Szrj 		    }
887538fd1498Szrj 
887638fd1498Szrj 		  if (!gsi_end_p (pattern_def_si))
887738fd1498Szrj 		    {
887838fd1498Szrj 		      if (dump_enabled_p ())
887938fd1498Szrj 			{
888038fd1498Szrj 			  dump_printf_loc (MSG_NOTE, vect_location,
888138fd1498Szrj 					   "==> vectorizing pattern def "
888238fd1498Szrj 					   "stmt: ");
888338fd1498Szrj 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
888438fd1498Szrj 					    pattern_def_stmt, 0);
888538fd1498Szrj 			}
888638fd1498Szrj 
888738fd1498Szrj 		      stmt = pattern_def_stmt;
888838fd1498Szrj 		      stmt_info = pattern_def_stmt_info;
888938fd1498Szrj 		    }
889038fd1498Szrj 		  else
889138fd1498Szrj 		    {
889238fd1498Szrj 		      pattern_def_si = gsi_none ();
889338fd1498Szrj 		      transform_pattern_stmt = false;
889438fd1498Szrj 		    }
889538fd1498Szrj 		}
889638fd1498Szrj 	      else
889738fd1498Szrj 		transform_pattern_stmt = false;
889838fd1498Szrj             }
889938fd1498Szrj 
890038fd1498Szrj 	  if (STMT_VINFO_VECTYPE (stmt_info))
890138fd1498Szrj 	    {
890238fd1498Szrj 	      poly_uint64 nunits
890338fd1498Szrj 		= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
890438fd1498Szrj 	      if (!STMT_SLP_TYPE (stmt_info)
890538fd1498Szrj 		  && maybe_ne (nunits, vf)
890638fd1498Szrj 		  && dump_enabled_p ())
890738fd1498Szrj 		  /* For SLP VF is set according to unrolling factor, and not
890838fd1498Szrj 		     to vector size, hence for SLP this print is not valid.  */
890938fd1498Szrj 		dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
891038fd1498Szrj 	    }
891138fd1498Szrj 
891238fd1498Szrj 	  /* SLP. Schedule all the SLP instances when the first SLP stmt is
891338fd1498Szrj 	     reached.  */
891438fd1498Szrj 	  if (STMT_SLP_TYPE (stmt_info))
891538fd1498Szrj 	    {
891638fd1498Szrj 	      if (!slp_scheduled)
891738fd1498Szrj 		{
891838fd1498Szrj 		  slp_scheduled = true;
891938fd1498Szrj 
892038fd1498Szrj 		  if (dump_enabled_p ())
892138fd1498Szrj 		    dump_printf_loc (MSG_NOTE, vect_location,
892238fd1498Szrj 				     "=== scheduling SLP instances ===\n");
892338fd1498Szrj 
892438fd1498Szrj 		  vect_schedule_slp (loop_vinfo);
892538fd1498Szrj 		}
892638fd1498Szrj 
892738fd1498Szrj 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
892838fd1498Szrj 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
892938fd1498Szrj 		{
893038fd1498Szrj 		  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
893138fd1498Szrj 		    {
893238fd1498Szrj 		      pattern_def_seq = NULL;
893338fd1498Szrj 		      gsi_next (&si);
893438fd1498Szrj 		    }
893538fd1498Szrj 		  continue;
893638fd1498Szrj 		}
893738fd1498Szrj 	    }
893838fd1498Szrj 
893938fd1498Szrj 	  /* -------- vectorize statement ------------ */
894038fd1498Szrj 	  if (dump_enabled_p ())
894138fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
894238fd1498Szrj 
894338fd1498Szrj 	  grouped_store = false;
894438fd1498Szrj 	  is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
894538fd1498Szrj           if (is_store)
894638fd1498Szrj             {
894738fd1498Szrj 	      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
894838fd1498Szrj 		{
894938fd1498Szrj 		  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
895038fd1498Szrj 		     interleaving chain was completed - free all the stores in
895138fd1498Szrj 		     the chain.  */
895238fd1498Szrj 		  gsi_next (&si);
895338fd1498Szrj 		  vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
895438fd1498Szrj 		}
895538fd1498Szrj 	      else
895638fd1498Szrj 		{
895738fd1498Szrj 		  /* Free the attached stmt_vec_info and remove the stmt.  */
895838fd1498Szrj 		  gimple *store = gsi_stmt (si);
895938fd1498Szrj 		  free_stmt_vec_info (store);
896038fd1498Szrj 		  unlink_stmt_vdef (store);
896138fd1498Szrj 		  gsi_remove (&si, true);
896238fd1498Szrj 		  release_defs (store);
896338fd1498Szrj 		}
896438fd1498Szrj 
896538fd1498Szrj 	      /* Stores can only appear at the end of pattern statements.  */
896638fd1498Szrj 	      gcc_assert (!transform_pattern_stmt);
896738fd1498Szrj 	      pattern_def_seq = NULL;
896838fd1498Szrj 	    }
896938fd1498Szrj 	  else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
897038fd1498Szrj 	    {
897138fd1498Szrj 	      pattern_def_seq = NULL;
897238fd1498Szrj 	      gsi_next (&si);
897338fd1498Szrj 	    }
897438fd1498Szrj 	}		        /* stmts in BB */
897538fd1498Szrj 
897638fd1498Szrj       /* Stub out scalar statements that must not survive vectorization.
897738fd1498Szrj 	 Doing this here helps with grouped statements, or statements that
897838fd1498Szrj 	 are involved in patterns.  */
897938fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
898038fd1498Szrj 	   !gsi_end_p (gsi); gsi_next (&gsi))
898138fd1498Szrj 	{
898238fd1498Szrj 	  gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
898338fd1498Szrj 	  if (call && gimple_call_internal_p (call, IFN_MASK_LOAD))
898438fd1498Szrj 	    {
898538fd1498Szrj 	      tree lhs = gimple_get_lhs (call);
898638fd1498Szrj 	      if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
898738fd1498Szrj 		{
898838fd1498Szrj 		  tree zero = build_zero_cst (TREE_TYPE (lhs));
898938fd1498Szrj 		  gimple *new_stmt = gimple_build_assign (lhs, zero);
899038fd1498Szrj 		  gsi_replace (&gsi, new_stmt, true);
899138fd1498Szrj 		}
899238fd1498Szrj 	    }
899338fd1498Szrj 	}
899438fd1498Szrj     }				/* BBs in loop */
899538fd1498Szrj 
899638fd1498Szrj   /* The vectorization factor is always > 1, so if we use an IV increment of 1.
899738fd1498Szrj      a zero NITERS becomes a nonzero NITERS_VECTOR.  */
899838fd1498Szrj   if (integer_onep (step_vector))
899938fd1498Szrj     niters_no_overflow = true;
900038fd1498Szrj   vect_set_loop_condition (loop, loop_vinfo, niters_vector, step_vector,
900138fd1498Szrj 			   niters_vector_mult_vf, !niters_no_overflow);
900238fd1498Szrj 
900338fd1498Szrj   unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
900438fd1498Szrj   scale_profile_for_vect_loop (loop, assumed_vf);
900538fd1498Szrj 
900638fd1498Szrj   /* True if the final iteration might not handle a full vector's
900738fd1498Szrj      worth of scalar iterations.  */
900838fd1498Szrj   bool final_iter_may_be_partial = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
900938fd1498Szrj   /* The minimum number of iterations performed by the epilogue.  This
901038fd1498Szrj      is 1 when peeling for gaps because we always need a final scalar
901138fd1498Szrj      iteration.  */
901238fd1498Szrj   int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0;
901338fd1498Szrj   /* +1 to convert latch counts to loop iteration counts,
901438fd1498Szrj      -min_epilogue_iters to remove iterations that cannot be performed
901538fd1498Szrj        by the vector code.  */
901638fd1498Szrj   int bias_for_lowest = 1 - min_epilogue_iters;
901738fd1498Szrj   int bias_for_assumed = bias_for_lowest;
901838fd1498Szrj   int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
901938fd1498Szrj   if (alignment_npeels && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
902038fd1498Szrj     {
902138fd1498Szrj       /* When the amount of peeling is known at compile time, the first
902238fd1498Szrj 	 iteration will have exactly alignment_npeels active elements.
902338fd1498Szrj 	 In the worst case it will have at least one.  */
902438fd1498Szrj       int min_first_active = (alignment_npeels > 0 ? alignment_npeels : 1);
902538fd1498Szrj       bias_for_lowest += lowest_vf - min_first_active;
902638fd1498Szrj       bias_for_assumed += assumed_vf - min_first_active;
902738fd1498Szrj     }
902838fd1498Szrj   /* In these calculations the "- 1" converts loop iteration counts
902938fd1498Szrj      back to latch counts.  */
903038fd1498Szrj   if (loop->any_upper_bound)
903138fd1498Szrj     loop->nb_iterations_upper_bound
903238fd1498Szrj       = (final_iter_may_be_partial
903338fd1498Szrj 	 ? wi::udiv_ceil (loop->nb_iterations_upper_bound + bias_for_lowest,
903438fd1498Szrj 			  lowest_vf) - 1
903538fd1498Szrj 	 : wi::udiv_floor (loop->nb_iterations_upper_bound + bias_for_lowest,
903638fd1498Szrj 			   lowest_vf) - 1);
903738fd1498Szrj   if (loop->any_likely_upper_bound)
903838fd1498Szrj     loop->nb_iterations_likely_upper_bound
903938fd1498Szrj       = (final_iter_may_be_partial
904038fd1498Szrj 	 ? wi::udiv_ceil (loop->nb_iterations_likely_upper_bound
904138fd1498Szrj 			  + bias_for_lowest, lowest_vf) - 1
904238fd1498Szrj 	 : wi::udiv_floor (loop->nb_iterations_likely_upper_bound
904338fd1498Szrj 			   + bias_for_lowest, lowest_vf) - 1);
904438fd1498Szrj   if (loop->any_estimate)
904538fd1498Szrj     loop->nb_iterations_estimate
904638fd1498Szrj       = (final_iter_may_be_partial
904738fd1498Szrj 	 ? wi::udiv_ceil (loop->nb_iterations_estimate + bias_for_assumed,
904838fd1498Szrj 			  assumed_vf) - 1
904938fd1498Szrj 	 : wi::udiv_floor (loop->nb_iterations_estimate + bias_for_assumed,
905038fd1498Szrj 			   assumed_vf) - 1);
905138fd1498Szrj 
905238fd1498Szrj   if (dump_enabled_p ())
905338fd1498Szrj     {
905438fd1498Szrj       if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
905538fd1498Szrj 	{
905638fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
905738fd1498Szrj 			   "LOOP VECTORIZED\n");
905838fd1498Szrj 	  if (loop->inner)
905938fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
906038fd1498Szrj 			     "OUTER LOOP VECTORIZED\n");
906138fd1498Szrj 	  dump_printf (MSG_NOTE, "\n");
906238fd1498Szrj 	}
906338fd1498Szrj       else
906438fd1498Szrj 	{
906538fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
906638fd1498Szrj 			   "LOOP EPILOGUE VECTORIZED (VS=");
906738fd1498Szrj 	  dump_dec (MSG_NOTE, current_vector_size);
906838fd1498Szrj 	  dump_printf (MSG_NOTE, ")\n");
906938fd1498Szrj 	}
907038fd1498Szrj     }
907138fd1498Szrj 
907238fd1498Szrj   /* Free SLP instances here because otherwise stmt reference counting
907338fd1498Szrj      won't work.  */
907438fd1498Szrj   slp_instance instance;
907538fd1498Szrj   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
907638fd1498Szrj     vect_free_slp_instance (instance);
907738fd1498Szrj   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
907838fd1498Szrj   /* Clear-up safelen field since its value is invalid after vectorization
907938fd1498Szrj      since vectorized loop can have loop-carried dependencies.  */
908038fd1498Szrj   loop->safelen = 0;
908138fd1498Szrj 
908238fd1498Szrj   /* Don't vectorize epilogue for epilogue.  */
908338fd1498Szrj   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
908438fd1498Szrj     epilogue = NULL;
908538fd1498Szrj 
908638fd1498Szrj   if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
908738fd1498Szrj     epilogue = NULL;
908838fd1498Szrj 
908938fd1498Szrj   if (epilogue)
909038fd1498Szrj     {
909138fd1498Szrj       auto_vector_sizes vector_sizes;
909238fd1498Szrj       targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
909338fd1498Szrj       unsigned int next_size = 0;
909438fd1498Szrj 
909538fd1498Szrj       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
909638fd1498Szrj 	  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0
909738fd1498Szrj 	  && known_eq (vf, lowest_vf))
909838fd1498Szrj 	{
909938fd1498Szrj 	  unsigned int eiters
910038fd1498Szrj 	    = (LOOP_VINFO_INT_NITERS (loop_vinfo)
910138fd1498Szrj 	       - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
910238fd1498Szrj 	  eiters = eiters % lowest_vf;
910338fd1498Szrj 	  epilogue->nb_iterations_upper_bound = eiters - 1;
910438fd1498Szrj 
910538fd1498Szrj 	  unsigned int ratio;
910638fd1498Szrj 	  while (next_size < vector_sizes.length ()
910738fd1498Szrj 		 && !(constant_multiple_p (current_vector_size,
910838fd1498Szrj 					   vector_sizes[next_size], &ratio)
910938fd1498Szrj 		      && eiters >= lowest_vf / ratio))
911038fd1498Szrj 	    next_size += 1;
911138fd1498Szrj 	}
911238fd1498Szrj       else
911338fd1498Szrj 	while (next_size < vector_sizes.length ()
911438fd1498Szrj 	       && maybe_lt (current_vector_size, vector_sizes[next_size]))
911538fd1498Szrj 	  next_size += 1;
911638fd1498Szrj 
911738fd1498Szrj       if (next_size == vector_sizes.length ())
911838fd1498Szrj 	epilogue = NULL;
911938fd1498Szrj     }
912038fd1498Szrj 
912138fd1498Szrj   if (epilogue)
912238fd1498Szrj     {
912338fd1498Szrj       epilogue->force_vectorize = loop->force_vectorize;
912438fd1498Szrj       epilogue->safelen = loop->safelen;
912538fd1498Szrj       epilogue->dont_vectorize = false;
912638fd1498Szrj 
912738fd1498Szrj       /* We may need to if-convert epilogue to vectorize it.  */
912838fd1498Szrj       if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
912938fd1498Szrj 	tree_if_conversion (epilogue);
913038fd1498Szrj     }
913138fd1498Szrj 
913238fd1498Szrj   return epilogue;
913338fd1498Szrj }
913438fd1498Szrj 
913538fd1498Szrj /* The code below is trying to perform simple optimization - revert
913638fd1498Szrj    if-conversion for masked stores, i.e. if the mask of a store is zero
913738fd1498Szrj    do not perform it and all stored value producers also if possible.
913838fd1498Szrj    For example,
913938fd1498Szrj      for (i=0; i<n; i++)
914038fd1498Szrj        if (c[i])
914138fd1498Szrj 	{
914238fd1498Szrj 	  p1[i] += 1;
914338fd1498Szrj 	  p2[i] = p3[i] +2;
914438fd1498Szrj 	}
914538fd1498Szrj    this transformation will produce the following semi-hammock:
914638fd1498Szrj 
914738fd1498Szrj    if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
914838fd1498Szrj      {
914938fd1498Szrj        vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
915038fd1498Szrj        vect__12.22_172 = vect__11.19_170 + vect_cst__171;
915138fd1498Szrj        MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
915238fd1498Szrj        vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
915338fd1498Szrj        vect__19.28_184 = vect__18.25_182 + vect_cst__183;
915438fd1498Szrj        MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
915538fd1498Szrj      }
915638fd1498Szrj */
915738fd1498Szrj 
915838fd1498Szrj void
optimize_mask_stores(struct loop * loop)915938fd1498Szrj optimize_mask_stores (struct loop *loop)
916038fd1498Szrj {
916138fd1498Szrj   basic_block *bbs = get_loop_body (loop);
916238fd1498Szrj   unsigned nbbs = loop->num_nodes;
916338fd1498Szrj   unsigned i;
916438fd1498Szrj   basic_block bb;
916538fd1498Szrj   struct loop *bb_loop;
916638fd1498Szrj   gimple_stmt_iterator gsi;
916738fd1498Szrj   gimple *stmt;
916838fd1498Szrj   auto_vec<gimple *> worklist;
916938fd1498Szrj 
917038fd1498Szrj   vect_location = find_loop_location (loop);
917138fd1498Szrj   /* Pick up all masked stores in loop if any.  */
917238fd1498Szrj   for (i = 0; i < nbbs; i++)
917338fd1498Szrj     {
917438fd1498Szrj       bb = bbs[i];
917538fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
917638fd1498Szrj 	   gsi_next (&gsi))
917738fd1498Szrj 	{
917838fd1498Szrj 	  stmt = gsi_stmt (gsi);
917938fd1498Szrj 	  if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
918038fd1498Szrj 	    worklist.safe_push (stmt);
918138fd1498Szrj 	}
918238fd1498Szrj     }
918338fd1498Szrj 
918438fd1498Szrj   free (bbs);
918538fd1498Szrj   if (worklist.is_empty ())
918638fd1498Szrj     return;
918738fd1498Szrj 
918838fd1498Szrj   /* Loop has masked stores.  */
918938fd1498Szrj   while (!worklist.is_empty ())
919038fd1498Szrj     {
919138fd1498Szrj       gimple *last, *last_store;
919238fd1498Szrj       edge e, efalse;
919338fd1498Szrj       tree mask;
919438fd1498Szrj       basic_block store_bb, join_bb;
919538fd1498Szrj       gimple_stmt_iterator gsi_to;
919638fd1498Szrj       tree vdef, new_vdef;
919738fd1498Szrj       gphi *phi;
919838fd1498Szrj       tree vectype;
919938fd1498Szrj       tree zero;
920038fd1498Szrj 
920138fd1498Szrj       last = worklist.pop ();
920238fd1498Szrj       mask = gimple_call_arg (last, 2);
920338fd1498Szrj       bb = gimple_bb (last);
920438fd1498Szrj       /* Create then_bb and if-then structure in CFG, then_bb belongs to
920538fd1498Szrj 	 the same loop as if_bb.  It could be different to LOOP when two
920638fd1498Szrj 	 level loop-nest is vectorized and mask_store belongs to the inner
920738fd1498Szrj 	 one.  */
920838fd1498Szrj       e = split_block (bb, last);
920938fd1498Szrj       bb_loop = bb->loop_father;
921038fd1498Szrj       gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop));
921138fd1498Szrj       join_bb = e->dest;
921238fd1498Szrj       store_bb = create_empty_bb (bb);
921338fd1498Szrj       add_bb_to_loop (store_bb, bb_loop);
921438fd1498Szrj       e->flags = EDGE_TRUE_VALUE;
921538fd1498Szrj       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
921638fd1498Szrj       /* Put STORE_BB to likely part.  */
921738fd1498Szrj       efalse->probability = profile_probability::unlikely ();
921838fd1498Szrj       store_bb->count = efalse->count ();
921938fd1498Szrj       make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
922038fd1498Szrj       if (dom_info_available_p (CDI_DOMINATORS))
922138fd1498Szrj 	set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
922238fd1498Szrj       if (dump_enabled_p ())
922338fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
922438fd1498Szrj 			 "Create new block %d to sink mask stores.",
922538fd1498Szrj 			 store_bb->index);
922638fd1498Szrj       /* Create vector comparison with boolean result.  */
922738fd1498Szrj       vectype = TREE_TYPE (mask);
922838fd1498Szrj       zero = build_zero_cst (vectype);
922938fd1498Szrj       stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
923038fd1498Szrj       gsi = gsi_last_bb (bb);
923138fd1498Szrj       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
923238fd1498Szrj       /* Create new PHI node for vdef of the last masked store:
923338fd1498Szrj 	 .MEM_2 = VDEF <.MEM_1>
923438fd1498Szrj 	 will be converted to
923538fd1498Szrj 	 .MEM.3 = VDEF <.MEM_1>
923638fd1498Szrj 	 and new PHI node will be created in join bb
923738fd1498Szrj 	 .MEM_2 = PHI <.MEM_1, .MEM_3>
923838fd1498Szrj       */
923938fd1498Szrj       vdef = gimple_vdef (last);
924038fd1498Szrj       new_vdef = make_ssa_name (gimple_vop (cfun), last);
924138fd1498Szrj       gimple_set_vdef (last, new_vdef);
924238fd1498Szrj       phi = create_phi_node (vdef, join_bb);
924338fd1498Szrj       add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
924438fd1498Szrj 
924538fd1498Szrj       /* Put all masked stores with the same mask to STORE_BB if possible.  */
924638fd1498Szrj       while (true)
924738fd1498Szrj 	{
924838fd1498Szrj 	  gimple_stmt_iterator gsi_from;
924938fd1498Szrj 	  gimple *stmt1 = NULL;
925038fd1498Szrj 
925138fd1498Szrj 	  /* Move masked store to STORE_BB.  */
925238fd1498Szrj 	  last_store = last;
925338fd1498Szrj 	  gsi = gsi_for_stmt (last);
925438fd1498Szrj 	  gsi_from = gsi;
925538fd1498Szrj 	  /* Shift GSI to the previous stmt for further traversal.  */
925638fd1498Szrj 	  gsi_prev (&gsi);
925738fd1498Szrj 	  gsi_to = gsi_start_bb (store_bb);
925838fd1498Szrj 	  gsi_move_before (&gsi_from, &gsi_to);
925938fd1498Szrj 	  /* Setup GSI_TO to the non-empty block start.  */
926038fd1498Szrj 	  gsi_to = gsi_start_bb (store_bb);
926138fd1498Szrj 	  if (dump_enabled_p ())
926238fd1498Szrj 	    {
926338fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
926438fd1498Szrj 			       "Move stmt to created bb\n");
926538fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0);
926638fd1498Szrj 	    }
926738fd1498Szrj 	  /* Move all stored value producers if possible.  */
926838fd1498Szrj 	  while (!gsi_end_p (gsi))
926938fd1498Szrj 	    {
927038fd1498Szrj 	      tree lhs;
927138fd1498Szrj 	      imm_use_iterator imm_iter;
927238fd1498Szrj 	      use_operand_p use_p;
927338fd1498Szrj 	      bool res;
927438fd1498Szrj 
927538fd1498Szrj 	      /* Skip debug statements.  */
927638fd1498Szrj 	      if (is_gimple_debug (gsi_stmt (gsi)))
927738fd1498Szrj 		{
927838fd1498Szrj 		  gsi_prev (&gsi);
927938fd1498Szrj 		  continue;
928038fd1498Szrj 		}
928138fd1498Szrj 	      stmt1 = gsi_stmt (gsi);
928238fd1498Szrj 	      /* Do not consider statements writing to memory or having
928338fd1498Szrj 		 volatile operand.  */
928438fd1498Szrj 	      if (gimple_vdef (stmt1)
928538fd1498Szrj 		  || gimple_has_volatile_ops (stmt1))
928638fd1498Szrj 		break;
928738fd1498Szrj 	      gsi_from = gsi;
928838fd1498Szrj 	      gsi_prev (&gsi);
928938fd1498Szrj 	      lhs = gimple_get_lhs (stmt1);
929038fd1498Szrj 	      if (!lhs)
929138fd1498Szrj 		break;
929238fd1498Szrj 
929338fd1498Szrj 	      /* LHS of vectorized stmt must be SSA_NAME.  */
929438fd1498Szrj 	      if (TREE_CODE (lhs) != SSA_NAME)
929538fd1498Szrj 		break;
929638fd1498Szrj 
929738fd1498Szrj 	      if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
929838fd1498Szrj 		{
929938fd1498Szrj 		  /* Remove dead scalar statement.  */
930038fd1498Szrj 		  if (has_zero_uses (lhs))
930138fd1498Szrj 		    {
930238fd1498Szrj 		      gsi_remove (&gsi_from, true);
930338fd1498Szrj 		      continue;
930438fd1498Szrj 		    }
930538fd1498Szrj 		}
930638fd1498Szrj 
930738fd1498Szrj 	      /* Check that LHS does not have uses outside of STORE_BB.  */
930838fd1498Szrj 	      res = true;
930938fd1498Szrj 	      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
931038fd1498Szrj 		{
931138fd1498Szrj 		  gimple *use_stmt;
931238fd1498Szrj 		  use_stmt = USE_STMT (use_p);
931338fd1498Szrj 		  if (is_gimple_debug (use_stmt))
931438fd1498Szrj 		    continue;
931538fd1498Szrj 		  if (gimple_bb (use_stmt) != store_bb)
931638fd1498Szrj 		    {
931738fd1498Szrj 		      res = false;
931838fd1498Szrj 		      break;
931938fd1498Szrj 		    }
932038fd1498Szrj 		}
932138fd1498Szrj 	      if (!res)
932238fd1498Szrj 		break;
932338fd1498Szrj 
932438fd1498Szrj 	      if (gimple_vuse (stmt1)
932538fd1498Szrj 		  && gimple_vuse (stmt1) != gimple_vuse (last_store))
932638fd1498Szrj 		break;
932738fd1498Szrj 
932838fd1498Szrj 	      /* Can move STMT1 to STORE_BB.  */
932938fd1498Szrj 	      if (dump_enabled_p ())
933038fd1498Szrj 		{
933138fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location,
933238fd1498Szrj 				   "Move stmt to created bb\n");
933338fd1498Szrj 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
933438fd1498Szrj 		}
933538fd1498Szrj 	      gsi_move_before (&gsi_from, &gsi_to);
933638fd1498Szrj 	      /* Shift GSI_TO for further insertion.  */
933738fd1498Szrj 	      gsi_prev (&gsi_to);
933838fd1498Szrj 	    }
933938fd1498Szrj 	  /* Put other masked stores with the same mask to STORE_BB.  */
934038fd1498Szrj 	  if (worklist.is_empty ()
934138fd1498Szrj 	      || gimple_call_arg (worklist.last (), 2) != mask
934238fd1498Szrj 	      || worklist.last () != stmt1)
934338fd1498Szrj 	    break;
934438fd1498Szrj 	  last = worklist.pop ();
934538fd1498Szrj 	}
934638fd1498Szrj       add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
934738fd1498Szrj     }
934838fd1498Szrj }
9349