138fd1498Szrj /* SLP - Basic Block Vectorization
238fd1498Szrj    Copyright (C) 2007-2018 Free Software Foundation, Inc.
338fd1498Szrj    Contributed by Dorit Naishlos <dorit@il.ibm.com>
438fd1498Szrj    and Ira Rosen <irar@il.ibm.com>
538fd1498Szrj 
638fd1498Szrj This file is part of GCC.
738fd1498Szrj 
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
938fd1498Szrj the terms of the GNU General Public License as published by the Free
1038fd1498Szrj Software Foundation; either version 3, or (at your option) any later
1138fd1498Szrj version.
1238fd1498Szrj 
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1438fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj 
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3.  If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>.  */
2138fd1498Szrj 
2238fd1498Szrj #include "config.h"
2338fd1498Szrj #include "system.h"
2438fd1498Szrj #include "coretypes.h"
2538fd1498Szrj #include "backend.h"
2638fd1498Szrj #include "target.h"
2738fd1498Szrj #include "rtl.h"
2838fd1498Szrj #include "tree.h"
2938fd1498Szrj #include "gimple.h"
3038fd1498Szrj #include "tree-pass.h"
3138fd1498Szrj #include "ssa.h"
3238fd1498Szrj #include "optabs-tree.h"
3338fd1498Szrj #include "insn-config.h"
3438fd1498Szrj #include "recog.h"		/* FIXME: for insn_data */
3538fd1498Szrj #include "params.h"
3638fd1498Szrj #include "fold-const.h"
3738fd1498Szrj #include "stor-layout.h"
3838fd1498Szrj #include "gimple-iterator.h"
3938fd1498Szrj #include "cfgloop.h"
4038fd1498Szrj #include "tree-vectorizer.h"
4138fd1498Szrj #include "langhooks.h"
4238fd1498Szrj #include "gimple-walk.h"
4338fd1498Szrj #include "dbgcnt.h"
4438fd1498Szrj #include "tree-vector-builder.h"
4538fd1498Szrj #include "vec-perm-indices.h"
4638fd1498Szrj #include "gimple-fold.h"
4738fd1498Szrj #include "internal-fn.h"
4838fd1498Szrj 
4938fd1498Szrj 
5038fd1498Szrj /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
5138fd1498Szrj 
5238fd1498Szrj static void
vect_free_slp_tree(slp_tree node)5338fd1498Szrj vect_free_slp_tree (slp_tree node)
5438fd1498Szrj {
5538fd1498Szrj   int i;
5638fd1498Szrj   slp_tree child;
5738fd1498Szrj 
5838fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
5938fd1498Szrj     vect_free_slp_tree (child);
6038fd1498Szrj 
6138fd1498Szrj   gimple *stmt;
6238fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
6338fd1498Szrj     /* After transform some stmts are removed and thus their vinfo is gone.  */
6438fd1498Szrj     if (vinfo_for_stmt (stmt))
6538fd1498Szrj       {
6638fd1498Szrj 	gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
6738fd1498Szrj 	STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
6838fd1498Szrj       }
6938fd1498Szrj 
7038fd1498Szrj   SLP_TREE_CHILDREN (node).release ();
7138fd1498Szrj   SLP_TREE_SCALAR_STMTS (node).release ();
7238fd1498Szrj   SLP_TREE_VEC_STMTS (node).release ();
7338fd1498Szrj   SLP_TREE_LOAD_PERMUTATION (node).release ();
7438fd1498Szrj 
7538fd1498Szrj   free (node);
7638fd1498Szrj }
7738fd1498Szrj 
7838fd1498Szrj 
7938fd1498Szrj /* Free the memory allocated for the SLP instance.  */
8038fd1498Szrj 
8138fd1498Szrj void
vect_free_slp_instance(slp_instance instance)8238fd1498Szrj vect_free_slp_instance (slp_instance instance)
8338fd1498Szrj {
8438fd1498Szrj   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
8538fd1498Szrj   SLP_INSTANCE_LOADS (instance).release ();
8638fd1498Szrj   free (instance);
8738fd1498Szrj }
8838fd1498Szrj 
8938fd1498Szrj 
9038fd1498Szrj /* Create an SLP node for SCALAR_STMTS.  */
9138fd1498Szrj 
9238fd1498Szrj static slp_tree
vect_create_new_slp_node(vec<gimple * > scalar_stmts)9338fd1498Szrj vect_create_new_slp_node (vec<gimple *> scalar_stmts)
9438fd1498Szrj {
9538fd1498Szrj   slp_tree node;
9638fd1498Szrj   gimple *stmt = scalar_stmts[0];
9738fd1498Szrj   unsigned int nops;
9838fd1498Szrj 
9938fd1498Szrj   if (is_gimple_call (stmt))
10038fd1498Szrj     nops = gimple_call_num_args (stmt);
10138fd1498Szrj   else if (is_gimple_assign (stmt))
10238fd1498Szrj     {
10338fd1498Szrj       nops = gimple_num_ops (stmt) - 1;
10438fd1498Szrj       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
10538fd1498Szrj 	nops++;
10638fd1498Szrj     }
10738fd1498Szrj   else if (gimple_code (stmt) == GIMPLE_PHI)
10838fd1498Szrj     nops = 0;
10938fd1498Szrj   else
11038fd1498Szrj     return NULL;
11138fd1498Szrj 
11238fd1498Szrj   node = XNEW (struct _slp_tree);
11338fd1498Szrj   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
11438fd1498Szrj   SLP_TREE_VEC_STMTS (node).create (0);
11538fd1498Szrj   SLP_TREE_CHILDREN (node).create (nops);
11638fd1498Szrj   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
11738fd1498Szrj   SLP_TREE_TWO_OPERATORS (node) = false;
11838fd1498Szrj   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
11938fd1498Szrj 
12038fd1498Szrj   unsigned i;
12138fd1498Szrj   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
12238fd1498Szrj     STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
12338fd1498Szrj 
12438fd1498Szrj   return node;
12538fd1498Szrj }
12638fd1498Szrj 
12738fd1498Szrj 
12838fd1498Szrj /* This structure is used in creation of an SLP tree.  Each instance
12938fd1498Szrj    corresponds to the same operand in a group of scalar stmts in an SLP
13038fd1498Szrj    node.  */
13138fd1498Szrj typedef struct _slp_oprnd_info
13238fd1498Szrj {
13338fd1498Szrj   /* Def-stmts for the operands.  */
13438fd1498Szrj   vec<gimple *> def_stmts;
13538fd1498Szrj   /* Information about the first statement, its vector def-type, type, the
13638fd1498Szrj      operand itself in case it's constant, and an indication if it's a pattern
13738fd1498Szrj      stmt.  */
13838fd1498Szrj   tree first_op_type;
13938fd1498Szrj   enum vect_def_type first_dt;
14038fd1498Szrj   bool first_pattern;
14138fd1498Szrj   bool second_pattern;
14238fd1498Szrj } *slp_oprnd_info;
14338fd1498Szrj 
14438fd1498Szrj 
14538fd1498Szrj /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
14638fd1498Szrj    operand.  */
14738fd1498Szrj static vec<slp_oprnd_info>
vect_create_oprnd_info(int nops,int group_size)14838fd1498Szrj vect_create_oprnd_info (int nops, int group_size)
14938fd1498Szrj {
15038fd1498Szrj   int i;
15138fd1498Szrj   slp_oprnd_info oprnd_info;
15238fd1498Szrj   vec<slp_oprnd_info> oprnds_info;
15338fd1498Szrj 
15438fd1498Szrj   oprnds_info.create (nops);
15538fd1498Szrj   for (i = 0; i < nops; i++)
15638fd1498Szrj     {
15738fd1498Szrj       oprnd_info = XNEW (struct _slp_oprnd_info);
15838fd1498Szrj       oprnd_info->def_stmts.create (group_size);
15938fd1498Szrj       oprnd_info->first_dt = vect_uninitialized_def;
16038fd1498Szrj       oprnd_info->first_op_type = NULL_TREE;
16138fd1498Szrj       oprnd_info->first_pattern = false;
16238fd1498Szrj       oprnd_info->second_pattern = false;
16338fd1498Szrj       oprnds_info.quick_push (oprnd_info);
16438fd1498Szrj     }
16538fd1498Szrj 
16638fd1498Szrj   return oprnds_info;
16738fd1498Szrj }
16838fd1498Szrj 
16938fd1498Szrj 
17038fd1498Szrj /* Free operands info.  */
17138fd1498Szrj 
17238fd1498Szrj static void
vect_free_oprnd_info(vec<slp_oprnd_info> & oprnds_info)17338fd1498Szrj vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
17438fd1498Szrj {
17538fd1498Szrj   int i;
17638fd1498Szrj   slp_oprnd_info oprnd_info;
17738fd1498Szrj 
17838fd1498Szrj   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
17938fd1498Szrj     {
18038fd1498Szrj       oprnd_info->def_stmts.release ();
18138fd1498Szrj       XDELETE (oprnd_info);
18238fd1498Szrj     }
18338fd1498Szrj 
18438fd1498Szrj   oprnds_info.release ();
18538fd1498Szrj }
18638fd1498Szrj 
18738fd1498Szrj 
18838fd1498Szrj /* Find the place of the data-ref in STMT in the interleaving chain that starts
18938fd1498Szrj    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
19038fd1498Szrj 
19138fd1498Szrj int
vect_get_place_in_interleaving_chain(gimple * stmt,gimple * first_stmt)19238fd1498Szrj vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
19338fd1498Szrj {
19438fd1498Szrj   gimple *next_stmt = first_stmt;
19538fd1498Szrj   int result = 0;
19638fd1498Szrj 
19738fd1498Szrj   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
19838fd1498Szrj     return -1;
19938fd1498Szrj 
20038fd1498Szrj   do
20138fd1498Szrj     {
20238fd1498Szrj       if (next_stmt == stmt)
20338fd1498Szrj 	return result;
20438fd1498Szrj       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
20538fd1498Szrj       if (next_stmt)
20638fd1498Szrj 	result += GROUP_GAP (vinfo_for_stmt (next_stmt));
20738fd1498Szrj     }
20838fd1498Szrj   while (next_stmt);
20938fd1498Szrj 
21038fd1498Szrj   return -1;
21138fd1498Szrj }
21238fd1498Szrj 
21338fd1498Szrj /* Check whether it is possible to load COUNT elements of type ELT_MODE
21438fd1498Szrj    using the method implemented by duplicate_and_interleave.  Return true
21538fd1498Szrj    if so, returning the number of intermediate vectors in *NVECTORS_OUT
21638fd1498Szrj    (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
21738fd1498Szrj    (if nonnull).  */
21838fd1498Szrj 
21938fd1498Szrj bool
can_duplicate_and_interleave_p(unsigned int count,machine_mode elt_mode,unsigned int * nvectors_out,tree * vector_type_out,tree * permutes)22038fd1498Szrj can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
22138fd1498Szrj 				unsigned int *nvectors_out,
22238fd1498Szrj 				tree *vector_type_out,
22338fd1498Szrj 				tree *permutes)
22438fd1498Szrj {
22538fd1498Szrj   poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
22638fd1498Szrj   poly_int64 nelts;
22738fd1498Szrj   unsigned int nvectors = 1;
22838fd1498Szrj   for (;;)
22938fd1498Szrj     {
23038fd1498Szrj       scalar_int_mode int_mode;
23138fd1498Szrj       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
23238fd1498Szrj       if (multiple_p (current_vector_size, elt_bytes, &nelts)
23338fd1498Szrj 	  && int_mode_for_size (elt_bits, 0).exists (&int_mode))
23438fd1498Szrj 	{
23538fd1498Szrj 	  tree int_type = build_nonstandard_integer_type
23638fd1498Szrj 	    (GET_MODE_BITSIZE (int_mode), 1);
23738fd1498Szrj 	  tree vector_type = build_vector_type (int_type, nelts);
23838fd1498Szrj 	  if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
23938fd1498Szrj 	    {
24038fd1498Szrj 	      vec_perm_builder sel1 (nelts, 2, 3);
24138fd1498Szrj 	      vec_perm_builder sel2 (nelts, 2, 3);
24238fd1498Szrj 	      poly_int64 half_nelts = exact_div (nelts, 2);
24338fd1498Szrj 	      for (unsigned int i = 0; i < 3; ++i)
24438fd1498Szrj 		{
24538fd1498Szrj 		  sel1.quick_push (i);
24638fd1498Szrj 		  sel1.quick_push (i + nelts);
24738fd1498Szrj 		  sel2.quick_push (half_nelts + i);
24838fd1498Szrj 		  sel2.quick_push (half_nelts + i + nelts);
24938fd1498Szrj 		}
25038fd1498Szrj 	      vec_perm_indices indices1 (sel1, 2, nelts);
25138fd1498Szrj 	      vec_perm_indices indices2 (sel2, 2, nelts);
25238fd1498Szrj 	      if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
25338fd1498Szrj 		  && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
25438fd1498Szrj 		{
25538fd1498Szrj 		  if (nvectors_out)
25638fd1498Szrj 		    *nvectors_out = nvectors;
25738fd1498Szrj 		  if (vector_type_out)
25838fd1498Szrj 		    *vector_type_out = vector_type;
25938fd1498Szrj 		  if (permutes)
26038fd1498Szrj 		    {
26138fd1498Szrj 		      permutes[0] = vect_gen_perm_mask_checked (vector_type,
26238fd1498Szrj 								indices1);
26338fd1498Szrj 		      permutes[1] = vect_gen_perm_mask_checked (vector_type,
26438fd1498Szrj 								indices2);
26538fd1498Szrj 		    }
26638fd1498Szrj 		  return true;
26738fd1498Szrj 		}
26838fd1498Szrj 	    }
26938fd1498Szrj 	}
27038fd1498Szrj       if (!multiple_p (elt_bytes, 2, &elt_bytes))
27138fd1498Szrj 	return false;
27238fd1498Szrj       nvectors *= 2;
27338fd1498Szrj     }
27438fd1498Szrj }
27538fd1498Szrj 
27638fd1498Szrj /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
27738fd1498Szrj    they are of a valid type and that they match the defs of the first stmt of
27838fd1498Szrj    the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
27938fd1498Szrj    by swapping operands of STMTS[STMT_NUM] when possible.  Non-zero *SWAP
28038fd1498Szrj    indicates swap is required for cond_expr stmts.  Specifically, *SWAP
28138fd1498Szrj    is 1 if STMT is cond and operands of comparison need to be swapped;
28238fd1498Szrj    *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
28338fd1498Szrj    If there is any operand swap in this function, *SWAP is set to non-zero
28438fd1498Szrj    value.
28538fd1498Szrj    If there was a fatal error return -1; if the error could be corrected by
28638fd1498Szrj    swapping operands of father node of this one, return 1; if everything is
28738fd1498Szrj    ok return 0.  */
28838fd1498Szrj static int
vect_get_and_check_slp_defs(vec_info * vinfo,unsigned char * swap,vec<gimple * > stmts,unsigned stmt_num,vec<slp_oprnd_info> * oprnds_info)28938fd1498Szrj vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
29038fd1498Szrj 			     vec<gimple *> stmts, unsigned stmt_num,
29138fd1498Szrj 			     vec<slp_oprnd_info> *oprnds_info)
29238fd1498Szrj {
29338fd1498Szrj   gimple *stmt = stmts[stmt_num];
29438fd1498Szrj   tree oprnd;
29538fd1498Szrj   unsigned int i, number_of_oprnds;
29638fd1498Szrj   gimple *def_stmt;
29738fd1498Szrj   enum vect_def_type dt = vect_uninitialized_def;
29838fd1498Szrj   bool pattern = false;
29938fd1498Szrj   slp_oprnd_info oprnd_info;
30038fd1498Szrj   int first_op_idx = 1;
30138fd1498Szrj   bool commutative = false;
30238fd1498Szrj   bool first_op_cond = false;
30338fd1498Szrj   bool first = stmt_num == 0;
30438fd1498Szrj   bool second = stmt_num == 1;
30538fd1498Szrj 
30638fd1498Szrj   if (is_gimple_call (stmt))
30738fd1498Szrj     {
30838fd1498Szrj       number_of_oprnds = gimple_call_num_args (stmt);
30938fd1498Szrj       first_op_idx = 3;
31038fd1498Szrj     }
31138fd1498Szrj   else if (is_gimple_assign (stmt))
31238fd1498Szrj     {
31338fd1498Szrj       enum tree_code code = gimple_assign_rhs_code (stmt);
31438fd1498Szrj       number_of_oprnds = gimple_num_ops (stmt) - 1;
31538fd1498Szrj       /* Swap can only be done for cond_expr if asked to, otherwise we
31638fd1498Szrj 	 could result in different comparison code to the first stmt.  */
31738fd1498Szrj       if (gimple_assign_rhs_code (stmt) == COND_EXPR
31838fd1498Szrj 	  && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
31938fd1498Szrj 	{
32038fd1498Szrj 	  first_op_cond = true;
32138fd1498Szrj 	  number_of_oprnds++;
32238fd1498Szrj 	}
32338fd1498Szrj       else
32438fd1498Szrj 	commutative = commutative_tree_code (code);
32538fd1498Szrj     }
32638fd1498Szrj   else
32738fd1498Szrj     return -1;
32838fd1498Szrj 
32938fd1498Szrj   bool swapped = (*swap != 0);
33038fd1498Szrj   gcc_assert (!swapped || first_op_cond);
33138fd1498Szrj   for (i = 0; i < number_of_oprnds; i++)
33238fd1498Szrj     {
33338fd1498Szrj again:
33438fd1498Szrj       if (first_op_cond)
33538fd1498Szrj 	{
33638fd1498Szrj 	  /* Map indicating how operands of cond_expr should be swapped.  */
33738fd1498Szrj 	  int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
33838fd1498Szrj 	  int *map = maps[*swap];
33938fd1498Szrj 
34038fd1498Szrj 	  if (i < 2)
34138fd1498Szrj 	    oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
34238fd1498Szrj 	  else
34338fd1498Szrj 	    oprnd = gimple_op (stmt, map[i]);
34438fd1498Szrj 	}
34538fd1498Szrj       else
34638fd1498Szrj 	oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
34738fd1498Szrj 
34838fd1498Szrj       oprnd_info = (*oprnds_info)[i];
34938fd1498Szrj 
35038fd1498Szrj       if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
35138fd1498Szrj 	{
35238fd1498Szrj 	  if (dump_enabled_p ())
35338fd1498Szrj 	    {
35438fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
35538fd1498Szrj 			       "Build SLP failed: can't analyze def for ");
35638fd1498Szrj 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
35738fd1498Szrj               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
35838fd1498Szrj 	    }
35938fd1498Szrj 
36038fd1498Szrj 	  return -1;
36138fd1498Szrj 	}
36238fd1498Szrj 
36338fd1498Szrj       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
36438fd1498Szrj          from the pattern.  Check that all the stmts of the node are in the
36538fd1498Szrj          pattern.  */
36638fd1498Szrj       if (def_stmt && gimple_bb (def_stmt)
36738fd1498Szrj           && vect_stmt_in_region_p (vinfo, def_stmt)
36838fd1498Szrj           && vinfo_for_stmt (def_stmt)
36938fd1498Szrj           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
37038fd1498Szrj 	  && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
37138fd1498Szrj 	  && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
37238fd1498Szrj         {
37338fd1498Szrj           pattern = true;
37438fd1498Szrj           if (!first && !oprnd_info->first_pattern
37538fd1498Szrj 	      /* Allow different pattern state for the defs of the
37638fd1498Szrj 		 first stmt in reduction chains.  */
37738fd1498Szrj 	      && (oprnd_info->first_dt != vect_reduction_def
37838fd1498Szrj 		  || (!second && !oprnd_info->second_pattern)))
37938fd1498Szrj 	    {
38038fd1498Szrj 	      if (i == 0
38138fd1498Szrj 		  && !swapped
38238fd1498Szrj 		  && commutative)
38338fd1498Szrj 		{
38438fd1498Szrj 		  swapped = true;
38538fd1498Szrj 		  goto again;
38638fd1498Szrj 		}
38738fd1498Szrj 
38838fd1498Szrj 	      if (dump_enabled_p ())
38938fd1498Szrj 		{
39038fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
39138fd1498Szrj 				   "Build SLP failed: some of the stmts"
39238fd1498Szrj 				   " are in a pattern, and others are not ");
39338fd1498Szrj 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
39438fd1498Szrj                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
39538fd1498Szrj 		}
39638fd1498Szrj 
39738fd1498Szrj 	      return 1;
39838fd1498Szrj             }
39938fd1498Szrj 
40038fd1498Szrj           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
40138fd1498Szrj           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
40238fd1498Szrj 
40338fd1498Szrj           if (dt == vect_unknown_def_type)
40438fd1498Szrj             {
40538fd1498Szrj               if (dump_enabled_p ())
40638fd1498Szrj                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
40738fd1498Szrj 				 "Unsupported pattern.\n");
40838fd1498Szrj               return -1;
40938fd1498Szrj             }
41038fd1498Szrj 
41138fd1498Szrj           switch (gimple_code (def_stmt))
41238fd1498Szrj             {
41338fd1498Szrj             case GIMPLE_PHI:
41438fd1498Szrj             case GIMPLE_ASSIGN:
41538fd1498Szrj 	      break;
41638fd1498Szrj 
41738fd1498Szrj 	    default:
41838fd1498Szrj 	      if (dump_enabled_p ())
41938fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
42038fd1498Szrj 				 "unsupported defining stmt:\n");
42138fd1498Szrj 	      return -1;
42238fd1498Szrj             }
42338fd1498Szrj         }
42438fd1498Szrj 
42538fd1498Szrj       if (second)
42638fd1498Szrj 	oprnd_info->second_pattern = pattern;
42738fd1498Szrj 
42838fd1498Szrj       if (first)
42938fd1498Szrj 	{
43038fd1498Szrj 	  oprnd_info->first_dt = dt;
43138fd1498Szrj 	  oprnd_info->first_pattern = pattern;
43238fd1498Szrj 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
43338fd1498Szrj 	}
43438fd1498Szrj       else
43538fd1498Szrj 	{
43638fd1498Szrj 	  /* Not first stmt of the group, check that the def-stmt/s match
43738fd1498Szrj 	     the def-stmt/s of the first stmt.  Allow different definition
43838fd1498Szrj 	     types for reduction chains: the first stmt must be a
43938fd1498Szrj 	     vect_reduction_def (a phi node), and the rest
44038fd1498Szrj 	     vect_internal_def.  */
44138fd1498Szrj 	  tree type = TREE_TYPE (oprnd);
44238fd1498Szrj 	  if ((oprnd_info->first_dt != dt
44338fd1498Szrj 	       && !(oprnd_info->first_dt == vect_reduction_def
44438fd1498Szrj 		    && dt == vect_internal_def)
44538fd1498Szrj 	       && !((oprnd_info->first_dt == vect_external_def
44638fd1498Szrj 		     || oprnd_info->first_dt == vect_constant_def)
44738fd1498Szrj 		    && (dt == vect_external_def
44838fd1498Szrj 			|| dt == vect_constant_def)))
44938fd1498Szrj 	      || !types_compatible_p (oprnd_info->first_op_type, type))
45038fd1498Szrj 	    {
45138fd1498Szrj 	      /* Try swapping operands if we got a mismatch.  */
45238fd1498Szrj 	      if (i == 0
45338fd1498Szrj 		  && !swapped
45438fd1498Szrj 		  && commutative)
45538fd1498Szrj 		{
45638fd1498Szrj 		  swapped = true;
45738fd1498Szrj 		  goto again;
45838fd1498Szrj 		}
45938fd1498Szrj 
46038fd1498Szrj 	      if (dump_enabled_p ())
46138fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
46238fd1498Szrj 				 "Build SLP failed: different types\n");
46338fd1498Szrj 
46438fd1498Szrj 	      return 1;
46538fd1498Szrj 	    }
46638fd1498Szrj 	  if ((dt == vect_constant_def
46738fd1498Szrj 	       || dt == vect_external_def)
46838fd1498Szrj 	      && !current_vector_size.is_constant ()
46938fd1498Szrj 	      && (TREE_CODE (type) == BOOLEAN_TYPE
47038fd1498Szrj 		  || !can_duplicate_and_interleave_p (stmts.length (),
47138fd1498Szrj 						      TYPE_MODE (type))))
47238fd1498Szrj 	    {
47338fd1498Szrj 	      if (dump_enabled_p ())
47438fd1498Szrj 		{
47538fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
47638fd1498Szrj 				   "Build SLP failed: invalid type of def "
47738fd1498Szrj 				   "for variable-length SLP ");
47838fd1498Szrj 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
47938fd1498Szrj 		  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
48038fd1498Szrj 		}
48138fd1498Szrj 	      return -1;
48238fd1498Szrj 	    }
48338fd1498Szrj 	}
48438fd1498Szrj 
48538fd1498Szrj       /* Check the types of the definitions.  */
48638fd1498Szrj       switch (dt)
48738fd1498Szrj 	{
48838fd1498Szrj 	case vect_constant_def:
48938fd1498Szrj 	case vect_external_def:
49038fd1498Szrj 	  break;
49138fd1498Szrj 
49238fd1498Szrj 	case vect_reduction_def:
49338fd1498Szrj 	case vect_induction_def:
49438fd1498Szrj 	case vect_internal_def:
49538fd1498Szrj 	  oprnd_info->def_stmts.quick_push (def_stmt);
49638fd1498Szrj 	  break;
49738fd1498Szrj 
49838fd1498Szrj 	default:
49938fd1498Szrj 	  /* FORNOW: Not supported.  */
50038fd1498Szrj 	  if (dump_enabled_p ())
50138fd1498Szrj 	    {
50238fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
50338fd1498Szrj 			       "Build SLP failed: illegal type of def ");
50438fd1498Szrj 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
50538fd1498Szrj               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
50638fd1498Szrj 	    }
50738fd1498Szrj 
50838fd1498Szrj 	  return -1;
50938fd1498Szrj 	}
51038fd1498Szrj     }
51138fd1498Szrj 
51238fd1498Szrj   /* Swap operands.  */
51338fd1498Szrj   if (swapped)
51438fd1498Szrj     {
51538fd1498Szrj       /* If there are already uses of this stmt in a SLP instance then
51638fd1498Szrj          we've committed to the operand order and can't swap it.  */
51738fd1498Szrj       if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
51838fd1498Szrj 	{
51938fd1498Szrj 	  if (dump_enabled_p ())
52038fd1498Szrj 	    {
52138fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
52238fd1498Szrj 			       "Build SLP failed: cannot swap operands of "
52338fd1498Szrj 			       "shared stmt ");
52438fd1498Szrj 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
52538fd1498Szrj 	    }
52638fd1498Szrj 	  return -1;
52738fd1498Szrj 	}
52838fd1498Szrj 
52938fd1498Szrj       if (first_op_cond)
53038fd1498Szrj 	{
53138fd1498Szrj 	  tree cond = gimple_assign_rhs1 (stmt);
53238fd1498Szrj 	  enum tree_code code = TREE_CODE (cond);
53338fd1498Szrj 
53438fd1498Szrj 	  /* Swap.  */
53538fd1498Szrj 	  if (*swap == 1)
53638fd1498Szrj 	    {
53738fd1498Szrj 	      swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
53838fd1498Szrj 				 &TREE_OPERAND (cond, 1));
53938fd1498Szrj 	      TREE_SET_CODE (cond, swap_tree_comparison (code));
54038fd1498Szrj 	    }
54138fd1498Szrj 	  /* Invert.  */
54238fd1498Szrj 	  else
54338fd1498Szrj 	    {
54438fd1498Szrj 	      swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
54538fd1498Szrj 				 gimple_assign_rhs3_ptr (stmt));
54638fd1498Szrj 	      bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
54738fd1498Szrj 	      code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
54838fd1498Szrj 	      gcc_assert (code != ERROR_MARK);
54938fd1498Szrj 	      TREE_SET_CODE (cond, code);
55038fd1498Szrj 	    }
55138fd1498Szrj 	}
55238fd1498Szrj       else
55338fd1498Szrj 	swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
55438fd1498Szrj 			   gimple_assign_rhs2_ptr (stmt));
55538fd1498Szrj       if (dump_enabled_p ())
55638fd1498Szrj 	{
55738fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
55838fd1498Szrj 			   "swapped operands to match def types in ");
55938fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
56038fd1498Szrj 	}
56138fd1498Szrj     }
56238fd1498Szrj 
56338fd1498Szrj   *swap = swapped;
56438fd1498Szrj   return 0;
56538fd1498Szrj }
56638fd1498Szrj 
56738fd1498Szrj /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
56838fd1498Szrj    caller's attempt to find the vector type in STMT with the narrowest
56938fd1498Szrj    element type.  Return true if VECTYPE is nonnull and if it is valid
57038fd1498Szrj    for VINFO.  When returning true, update MAX_NUNITS to reflect the
57138fd1498Szrj    number of units in VECTYPE.  VINFO, GORUP_SIZE and MAX_NUNITS are
57238fd1498Szrj    as for vect_build_slp_tree.  */
57338fd1498Szrj 
57438fd1498Szrj static bool
vect_record_max_nunits(vec_info * vinfo,gimple * stmt,unsigned int group_size,tree vectype,poly_uint64 * max_nunits)57538fd1498Szrj vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
57638fd1498Szrj 			tree vectype, poly_uint64 *max_nunits)
57738fd1498Szrj {
57838fd1498Szrj   if (!vectype)
57938fd1498Szrj     {
58038fd1498Szrj       if (dump_enabled_p ())
58138fd1498Szrj 	{
58238fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
58338fd1498Szrj 			   "Build SLP failed: unsupported data-type in ");
58438fd1498Szrj 	  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
58538fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
58638fd1498Szrj 	}
58738fd1498Szrj       /* Fatal mismatch.  */
58838fd1498Szrj       return false;
58938fd1498Szrj     }
59038fd1498Szrj 
59138fd1498Szrj   /* If populating the vector type requires unrolling then fail
59238fd1498Szrj      before adjusting *max_nunits for basic-block vectorization.  */
59338fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
59438fd1498Szrj   unsigned HOST_WIDE_INT const_nunits;
59538fd1498Szrj   if (is_a <bb_vec_info> (vinfo)
59638fd1498Szrj       && (!nunits.is_constant (&const_nunits)
59738fd1498Szrj 	  || const_nunits > group_size))
59838fd1498Szrj     {
59938fd1498Szrj       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
60038fd1498Szrj 		       "Build SLP failed: unrolling required "
60138fd1498Szrj 		       "in basic block SLP\n");
60238fd1498Szrj       /* Fatal mismatch.  */
60338fd1498Szrj       return false;
60438fd1498Szrj     }
60538fd1498Szrj 
60638fd1498Szrj   /* In case of multiple types we need to detect the smallest type.  */
60738fd1498Szrj   vect_update_max_nunits (max_nunits, vectype);
60838fd1498Szrj   return true;
60938fd1498Szrj }
61038fd1498Szrj 
61138fd1498Szrj /* Verify if the scalar stmts STMTS are isomorphic, require data
61238fd1498Szrj    permutation or are of unsupported types of operation.  Return
61338fd1498Szrj    true if they are, otherwise return false and indicate in *MATCHES
61438fd1498Szrj    which stmts are not isomorphic to the first one.  If MATCHES[0]
61538fd1498Szrj    is false then this indicates the comparison could not be
61638fd1498Szrj    carried out or the stmts will never be vectorized by SLP.
61738fd1498Szrj 
61838fd1498Szrj    Note COND_EXPR is possibly ismorphic to another one after swapping its
61938fd1498Szrj    operands.  Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
62038fd1498Szrj    the first stmt by swapping the two operands of comparison; set SWAP[i]
62138fd1498Szrj    to 2 if stmt I is isormorphic to the first stmt by inverting the code
62238fd1498Szrj    of comparison.  Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
62338fd1498Szrj    to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1.  */
62438fd1498Szrj 
62538fd1498Szrj static bool
vect_build_slp_tree_1(vec_info * vinfo,unsigned char * swap,vec<gimple * > stmts,unsigned int group_size,unsigned nops,poly_uint64 * max_nunits,bool * matches,bool * two_operators)62638fd1498Szrj vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
62738fd1498Szrj 		       vec<gimple *> stmts, unsigned int group_size,
62838fd1498Szrj 		       unsigned nops, poly_uint64 *max_nunits,
62938fd1498Szrj 		       bool *matches, bool *two_operators)
63038fd1498Szrj {
63138fd1498Szrj   unsigned int i;
63238fd1498Szrj   gimple *first_stmt = stmts[0], *stmt = stmts[0];
63338fd1498Szrj   enum tree_code first_stmt_code = ERROR_MARK;
63438fd1498Szrj   enum tree_code alt_stmt_code = ERROR_MARK;
63538fd1498Szrj   enum tree_code rhs_code = ERROR_MARK;
63638fd1498Szrj   enum tree_code first_cond_code = ERROR_MARK;
63738fd1498Szrj   tree lhs;
63838fd1498Szrj   bool need_same_oprnds = false;
63938fd1498Szrj   tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
64038fd1498Szrj   optab optab;
64138fd1498Szrj   int icode;
64238fd1498Szrj   machine_mode optab_op2_mode;
64338fd1498Szrj   machine_mode vec_mode;
64438fd1498Szrj   HOST_WIDE_INT dummy;
64538fd1498Szrj   gimple *first_load = NULL, *prev_first_load = NULL;
64638fd1498Szrj 
64738fd1498Szrj   /* For every stmt in NODE find its def stmt/s.  */
64838fd1498Szrj   FOR_EACH_VEC_ELT (stmts, i, stmt)
64938fd1498Szrj     {
65038fd1498Szrj       swap[i] = 0;
65138fd1498Szrj       matches[i] = false;
65238fd1498Szrj 
65338fd1498Szrj       if (dump_enabled_p ())
65438fd1498Szrj 	{
65538fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
65638fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
65738fd1498Szrj 	}
65838fd1498Szrj 
65938fd1498Szrj       /* Fail to vectorize statements marked as unvectorizable.  */
66038fd1498Szrj       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
66138fd1498Szrj         {
66238fd1498Szrj           if (dump_enabled_p ())
66338fd1498Szrj             {
66438fd1498Szrj               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
66538fd1498Szrj 			       "Build SLP failed: unvectorizable statement ");
66638fd1498Szrj               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
66738fd1498Szrj             }
66838fd1498Szrj 	  /* Fatal mismatch.  */
66938fd1498Szrj 	  matches[0] = false;
67038fd1498Szrj           return false;
67138fd1498Szrj         }
67238fd1498Szrj 
67338fd1498Szrj       lhs = gimple_get_lhs (stmt);
67438fd1498Szrj       if (lhs == NULL_TREE)
67538fd1498Szrj 	{
67638fd1498Szrj 	  if (dump_enabled_p ())
67738fd1498Szrj 	    {
67838fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
67938fd1498Szrj 			       "Build SLP failed: not GIMPLE_ASSIGN nor "
68038fd1498Szrj 			       "GIMPLE_CALL ");
68138fd1498Szrj 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
68238fd1498Szrj 	    }
68338fd1498Szrj 	  /* Fatal mismatch.  */
68438fd1498Szrj 	  matches[0] = false;
68538fd1498Szrj 	  return false;
68638fd1498Szrj 	}
68738fd1498Szrj 
68838fd1498Szrj       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
68938fd1498Szrj       vectype = get_vectype_for_scalar_type (scalar_type);
69038fd1498Szrj       if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
69138fd1498Szrj 				   max_nunits))
69238fd1498Szrj 	{
69338fd1498Szrj 	  /* Fatal mismatch.  */
69438fd1498Szrj 	  matches[0] = false;
69538fd1498Szrj           return false;
69638fd1498Szrj         }
69738fd1498Szrj 
69838fd1498Szrj       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
69938fd1498Szrj 	{
70038fd1498Szrj 	  rhs_code = CALL_EXPR;
70138fd1498Szrj 	  if (gimple_call_internal_p (call_stmt)
70238fd1498Szrj 	      || gimple_call_tail_p (call_stmt)
70338fd1498Szrj 	      || gimple_call_noreturn_p (call_stmt)
70438fd1498Szrj 	      || !gimple_call_nothrow_p (call_stmt)
70538fd1498Szrj 	      || gimple_call_chain (call_stmt))
70638fd1498Szrj 	    {
70738fd1498Szrj 	      if (dump_enabled_p ())
70838fd1498Szrj 		{
70938fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
71038fd1498Szrj 				   "Build SLP failed: unsupported call type ");
71138fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
71238fd1498Szrj 				    call_stmt, 0);
71338fd1498Szrj 		}
71438fd1498Szrj 	      /* Fatal mismatch.  */
71538fd1498Szrj 	      matches[0] = false;
71638fd1498Szrj 	      return false;
71738fd1498Szrj 	    }
71838fd1498Szrj 	}
71938fd1498Szrj       else
72038fd1498Szrj 	rhs_code = gimple_assign_rhs_code (stmt);
72138fd1498Szrj 
72238fd1498Szrj       /* Check the operation.  */
72338fd1498Szrj       if (i == 0)
72438fd1498Szrj 	{
72538fd1498Szrj 	  first_stmt_code = rhs_code;
72638fd1498Szrj 
72738fd1498Szrj 	  /* Shift arguments should be equal in all the packed stmts for a
72838fd1498Szrj 	     vector shift with scalar shift operand.  */
72938fd1498Szrj 	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
73038fd1498Szrj 	      || rhs_code == LROTATE_EXPR
73138fd1498Szrj 	      || rhs_code == RROTATE_EXPR)
73238fd1498Szrj 	    {
73338fd1498Szrj 	      vec_mode = TYPE_MODE (vectype);
73438fd1498Szrj 
73538fd1498Szrj 	      /* First see if we have a vector/vector shift.  */
73638fd1498Szrj 	      optab = optab_for_tree_code (rhs_code, vectype,
73738fd1498Szrj 					   optab_vector);
73838fd1498Szrj 
73938fd1498Szrj 	      if (!optab
74038fd1498Szrj 		  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
74138fd1498Szrj 		{
74238fd1498Szrj 		  /* No vector/vector shift, try for a vector/scalar shift.  */
74338fd1498Szrj 		  optab = optab_for_tree_code (rhs_code, vectype,
74438fd1498Szrj 					       optab_scalar);
74538fd1498Szrj 
74638fd1498Szrj 		  if (!optab)
74738fd1498Szrj 		    {
74838fd1498Szrj 		      if (dump_enabled_p ())
74938fd1498Szrj 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
75038fd1498Szrj 					 "Build SLP failed: no optab.\n");
75138fd1498Szrj 		      /* Fatal mismatch.  */
75238fd1498Szrj 		      matches[0] = false;
75338fd1498Szrj 		      return false;
75438fd1498Szrj 		    }
75538fd1498Szrj 		  icode = (int) optab_handler (optab, vec_mode);
75638fd1498Szrj 		  if (icode == CODE_FOR_nothing)
75738fd1498Szrj 		    {
75838fd1498Szrj 		      if (dump_enabled_p ())
75938fd1498Szrj 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
76038fd1498Szrj 					 "Build SLP failed: "
76138fd1498Szrj 					 "op not supported by target.\n");
76238fd1498Szrj 		      /* Fatal mismatch.  */
76338fd1498Szrj 		      matches[0] = false;
76438fd1498Szrj 		      return false;
76538fd1498Szrj 		    }
76638fd1498Szrj 		  optab_op2_mode = insn_data[icode].operand[2].mode;
76738fd1498Szrj 		  if (!VECTOR_MODE_P (optab_op2_mode))
76838fd1498Szrj 		    {
76938fd1498Szrj 		      need_same_oprnds = true;
77038fd1498Szrj 		      first_op1 = gimple_assign_rhs2 (stmt);
77138fd1498Szrj 		    }
77238fd1498Szrj 		}
77338fd1498Szrj 	    }
77438fd1498Szrj 	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
77538fd1498Szrj             {
77638fd1498Szrj               need_same_oprnds = true;
77738fd1498Szrj               first_op1 = gimple_assign_rhs2 (stmt);
77838fd1498Szrj             }
77938fd1498Szrj 	}
78038fd1498Szrj       else
78138fd1498Szrj 	{
78238fd1498Szrj 	  if (first_stmt_code != rhs_code
78338fd1498Szrj 	      && alt_stmt_code == ERROR_MARK)
78438fd1498Szrj 	    alt_stmt_code = rhs_code;
78538fd1498Szrj 	  if (first_stmt_code != rhs_code
78638fd1498Szrj 	      && (first_stmt_code != IMAGPART_EXPR
78738fd1498Szrj 		  || rhs_code != REALPART_EXPR)
78838fd1498Szrj 	      && (first_stmt_code != REALPART_EXPR
78938fd1498Szrj 		  || rhs_code != IMAGPART_EXPR)
79038fd1498Szrj 	      /* Handle mismatches in plus/minus by computing both
79138fd1498Szrj 		 and merging the results.  */
79238fd1498Szrj 	      && !((first_stmt_code == PLUS_EXPR
79338fd1498Szrj 		    || first_stmt_code == MINUS_EXPR)
79438fd1498Szrj 		   && (alt_stmt_code == PLUS_EXPR
79538fd1498Szrj 		       || alt_stmt_code == MINUS_EXPR)
79638fd1498Szrj 		   && rhs_code == alt_stmt_code)
79738fd1498Szrj               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
79838fd1498Szrj                    && (first_stmt_code == ARRAY_REF
79938fd1498Szrj                        || first_stmt_code == BIT_FIELD_REF
80038fd1498Szrj                        || first_stmt_code == INDIRECT_REF
80138fd1498Szrj                        || first_stmt_code == COMPONENT_REF
80238fd1498Szrj                        || first_stmt_code == MEM_REF)))
80338fd1498Szrj 	    {
80438fd1498Szrj 	      if (dump_enabled_p ())
80538fd1498Szrj 		{
80638fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
80738fd1498Szrj 				   "Build SLP failed: different operation "
80838fd1498Szrj 				   "in stmt ");
80938fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
81038fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
81138fd1498Szrj 				   "original stmt ");
81238fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
81338fd1498Szrj 				    first_stmt, 0);
81438fd1498Szrj 		}
81538fd1498Szrj 	      /* Mismatch.  */
81638fd1498Szrj 	      continue;
81738fd1498Szrj 	    }
81838fd1498Szrj 
81938fd1498Szrj 	  if (need_same_oprnds
82038fd1498Szrj 	      && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
82138fd1498Szrj 	    {
82238fd1498Szrj 	      if (dump_enabled_p ())
82338fd1498Szrj 		{
82438fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
82538fd1498Szrj 				   "Build SLP failed: different shift "
82638fd1498Szrj 				   "arguments in ");
82738fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
82838fd1498Szrj 		}
82938fd1498Szrj 	      /* Mismatch.  */
83038fd1498Szrj 	      continue;
83138fd1498Szrj 	    }
83238fd1498Szrj 
83338fd1498Szrj 	  if (rhs_code == CALL_EXPR)
83438fd1498Szrj 	    {
83538fd1498Szrj 	      gimple *first_stmt = stmts[0];
83638fd1498Szrj 	      if (gimple_call_num_args (stmt) != nops
83738fd1498Szrj 		  || !operand_equal_p (gimple_call_fn (first_stmt),
83838fd1498Szrj 				       gimple_call_fn (stmt), 0)
83938fd1498Szrj 		  || gimple_call_fntype (first_stmt)
84038fd1498Szrj 		     != gimple_call_fntype (stmt))
84138fd1498Szrj 		{
84238fd1498Szrj 		  if (dump_enabled_p ())
84338fd1498Szrj 		    {
84438fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
84538fd1498Szrj 				       "Build SLP failed: different calls in ");
84638fd1498Szrj 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
84738fd1498Szrj 					stmt, 0);
84838fd1498Szrj 		    }
84938fd1498Szrj 		  /* Mismatch.  */
85038fd1498Szrj 		  continue;
85138fd1498Szrj 		}
85238fd1498Szrj 	    }
85338fd1498Szrj 	}
85438fd1498Szrj 
85538fd1498Szrj       /* Grouped store or load.  */
85638fd1498Szrj       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
85738fd1498Szrj 	{
85838fd1498Szrj 	  if (REFERENCE_CLASS_P (lhs))
85938fd1498Szrj 	    {
86038fd1498Szrj 	      /* Store.  */
86138fd1498Szrj 	      ;
86238fd1498Szrj 	    }
86338fd1498Szrj 	  else
86438fd1498Szrj 	    {
86538fd1498Szrj 	      /* Load.  */
86638fd1498Szrj               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
86738fd1498Szrj               if (prev_first_load)
86838fd1498Szrj                 {
86938fd1498Szrj                   /* Check that there are no loads from different interleaving
87038fd1498Szrj                      chains in the same node.  */
87138fd1498Szrj                   if (prev_first_load != first_load)
87238fd1498Szrj                     {
87338fd1498Szrj                       if (dump_enabled_p ())
87438fd1498Szrj                         {
87538fd1498Szrj                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
87638fd1498Szrj 					   vect_location,
87738fd1498Szrj 					   "Build SLP failed: different "
87838fd1498Szrj 					   "interleaving chains in one node ");
87938fd1498Szrj                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
88038fd1498Szrj 					    stmt, 0);
88138fd1498Szrj                         }
88238fd1498Szrj 		      /* Mismatch.  */
88338fd1498Szrj 		      continue;
88438fd1498Szrj                     }
88538fd1498Szrj                 }
88638fd1498Szrj               else
88738fd1498Szrj                 prev_first_load = first_load;
88838fd1498Szrj            }
88938fd1498Szrj         } /* Grouped access.  */
89038fd1498Szrj       else
89138fd1498Szrj 	{
89238fd1498Szrj 	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
89338fd1498Szrj 	    {
89438fd1498Szrj 	      /* Not grouped load.  */
89538fd1498Szrj 	      if (dump_enabled_p ())
89638fd1498Szrj 		{
89738fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
89838fd1498Szrj 				   "Build SLP failed: not grouped load ");
89938fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
90038fd1498Szrj 		}
90138fd1498Szrj 
90238fd1498Szrj 	      /* FORNOW: Not grouped loads are not supported.  */
90338fd1498Szrj 	      /* Fatal mismatch.  */
90438fd1498Szrj 	      matches[0] = false;
90538fd1498Szrj 	      return false;
90638fd1498Szrj 	    }
90738fd1498Szrj 
90838fd1498Szrj 	  /* Not memory operation.  */
90938fd1498Szrj 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
91038fd1498Szrj 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
91138fd1498Szrj 	      && TREE_CODE_CLASS (rhs_code) != tcc_expression
91238fd1498Szrj 	      && TREE_CODE_CLASS (rhs_code) != tcc_comparison
91338fd1498Szrj 	      && rhs_code != CALL_EXPR)
91438fd1498Szrj 	    {
91538fd1498Szrj 	      if (dump_enabled_p ())
91638fd1498Szrj 		{
91738fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
91838fd1498Szrj 				   "Build SLP failed: operation");
91938fd1498Szrj 		  dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
92038fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
92138fd1498Szrj 		}
92238fd1498Szrj 	      /* Fatal mismatch.  */
92338fd1498Szrj 	      matches[0] = false;
92438fd1498Szrj 	      return false;
92538fd1498Szrj 	    }
92638fd1498Szrj 
92738fd1498Szrj 	  if (rhs_code == COND_EXPR)
92838fd1498Szrj 	    {
92938fd1498Szrj 	      tree cond_expr = gimple_assign_rhs1 (stmt);
93038fd1498Szrj 	      enum tree_code cond_code = TREE_CODE (cond_expr);
93138fd1498Szrj 	      enum tree_code swap_code = ERROR_MARK;
93238fd1498Szrj 	      enum tree_code invert_code = ERROR_MARK;
93338fd1498Szrj 
93438fd1498Szrj 	      if (i == 0)
93538fd1498Szrj 		first_cond_code = TREE_CODE (cond_expr);
93638fd1498Szrj 	      else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
93738fd1498Szrj 		{
93838fd1498Szrj 		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
93938fd1498Szrj 		  swap_code = swap_tree_comparison (cond_code);
94038fd1498Szrj 		  invert_code = invert_tree_comparison (cond_code, honor_nans);
94138fd1498Szrj 		}
94238fd1498Szrj 
94338fd1498Szrj 	      if (first_cond_code == cond_code)
94438fd1498Szrj 		;
94538fd1498Szrj 	      /* Isomorphic can be achieved by swapping.  */
94638fd1498Szrj 	      else if (first_cond_code == swap_code)
94738fd1498Szrj 		swap[i] = 1;
94838fd1498Szrj 	      /* Isomorphic can be achieved by inverting.  */
94938fd1498Szrj 	      else if (first_cond_code == invert_code)
95038fd1498Szrj 		swap[i] = 2;
95138fd1498Szrj 	      else
95238fd1498Szrj 		{
95338fd1498Szrj 		  if (dump_enabled_p ())
95438fd1498Szrj 		    {
95538fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
95638fd1498Szrj 				       "Build SLP failed: different"
95738fd1498Szrj 				       " operation");
95838fd1498Szrj 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
95938fd1498Szrj 					stmt, 0);
96038fd1498Szrj 		    }
96138fd1498Szrj 		  /* Mismatch.  */
96238fd1498Szrj 		  continue;
96338fd1498Szrj 		}
96438fd1498Szrj 	    }
96538fd1498Szrj 	}
96638fd1498Szrj 
96738fd1498Szrj       matches[i] = true;
96838fd1498Szrj     }
96938fd1498Szrj 
97038fd1498Szrj   for (i = 0; i < group_size; ++i)
97138fd1498Szrj     if (!matches[i])
97238fd1498Szrj       return false;
97338fd1498Szrj 
97438fd1498Szrj   /* If we allowed a two-operation SLP node verify the target can cope
97538fd1498Szrj      with the permute we are going to use.  */
97638fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
97738fd1498Szrj   if (alt_stmt_code != ERROR_MARK
97838fd1498Szrj       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
97938fd1498Szrj     {
98038fd1498Szrj       unsigned HOST_WIDE_INT count;
98138fd1498Szrj       if (!nunits.is_constant (&count))
98238fd1498Szrj 	{
98338fd1498Szrj 	  if (dump_enabled_p ())
98438fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
98538fd1498Szrj 			     "Build SLP failed: different operations "
98638fd1498Szrj 			     "not allowed with variable-length SLP.\n");
98738fd1498Szrj 	  return false;
98838fd1498Szrj 	}
98938fd1498Szrj       vec_perm_builder sel (count, count, 1);
99038fd1498Szrj       for (i = 0; i < count; ++i)
99138fd1498Szrj 	{
99238fd1498Szrj 	  unsigned int elt = i;
99338fd1498Szrj 	  if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
99438fd1498Szrj 	    elt += count;
99538fd1498Szrj 	  sel.quick_push (elt);
99638fd1498Szrj 	}
99738fd1498Szrj       vec_perm_indices indices (sel, 2, count);
99838fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
99938fd1498Szrj 	{
100038fd1498Szrj 	  for (i = 0; i < group_size; ++i)
100138fd1498Szrj 	    if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
100238fd1498Szrj 	      {
100338fd1498Szrj 		matches[i] = false;
100438fd1498Szrj 		if (dump_enabled_p ())
100538fd1498Szrj 		  {
100638fd1498Szrj 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
100738fd1498Szrj 				     "Build SLP failed: different operation "
100838fd1498Szrj 				     "in stmt ");
100938fd1498Szrj 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
101038fd1498Szrj 				      stmts[i], 0);
101138fd1498Szrj 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
101238fd1498Szrj 				     "original stmt ");
101338fd1498Szrj 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
101438fd1498Szrj 				      first_stmt, 0);
101538fd1498Szrj 		  }
101638fd1498Szrj 	      }
101738fd1498Szrj 	  return false;
101838fd1498Szrj 	}
101938fd1498Szrj       *two_operators = true;
102038fd1498Szrj     }
102138fd1498Szrj 
102238fd1498Szrj   return true;
102338fd1498Szrj }
102438fd1498Szrj 
102538fd1498Szrj /* Traits for the hash_set to record failed SLP builds for a stmt set.
102638fd1498Szrj    Note we never remove apart from at destruction time so we do not
102738fd1498Szrj    need a special value for deleted that differs from empty.  */
102838fd1498Szrj struct bst_traits
102938fd1498Szrj {
103038fd1498Szrj   typedef vec <gimple *> value_type;
103138fd1498Szrj   typedef vec <gimple *> compare_type;
103238fd1498Szrj   static inline hashval_t hash (value_type);
103338fd1498Szrj   static inline bool equal (value_type existing, value_type candidate);
is_emptybst_traits103438fd1498Szrj   static inline bool is_empty (value_type x) { return !x.exists (); }
is_deletedbst_traits103538fd1498Szrj   static inline bool is_deleted (value_type x) { return !x.exists (); }
mark_emptybst_traits103638fd1498Szrj   static inline void mark_empty (value_type &x) { x.release (); }
mark_deletedbst_traits103738fd1498Szrj   static inline void mark_deleted (value_type &x) { x.release (); }
removebst_traits103838fd1498Szrj   static inline void remove (value_type &x) { x.release (); }
103938fd1498Szrj };
104038fd1498Szrj inline hashval_t
hash(value_type x)104138fd1498Szrj bst_traits::hash (value_type x)
104238fd1498Szrj {
104338fd1498Szrj   inchash::hash h;
104438fd1498Szrj   for (unsigned i = 0; i < x.length (); ++i)
104538fd1498Szrj     h.add_int (gimple_uid (x[i]));
104638fd1498Szrj   return h.end ();
104738fd1498Szrj }
104838fd1498Szrj inline bool
equal(value_type existing,value_type candidate)104938fd1498Szrj bst_traits::equal (value_type existing, value_type candidate)
105038fd1498Szrj {
105138fd1498Szrj   if (existing.length () != candidate.length ())
105238fd1498Szrj     return false;
105338fd1498Szrj   for (unsigned i = 0; i < existing.length (); ++i)
105438fd1498Szrj     if (existing[i] != candidate[i])
105538fd1498Szrj       return false;
105638fd1498Szrj   return true;
105738fd1498Szrj }
105838fd1498Szrj 
105938fd1498Szrj typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
106038fd1498Szrj static scalar_stmts_set_t *bst_fail;
106138fd1498Szrj 
106238fd1498Szrj static slp_tree
106338fd1498Szrj vect_build_slp_tree_2 (vec_info *vinfo,
106438fd1498Szrj 		       vec<gimple *> stmts, unsigned int group_size,
106538fd1498Szrj 		       poly_uint64 *max_nunits,
106638fd1498Szrj 		       vec<slp_tree> *loads,
106738fd1498Szrj 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
106838fd1498Szrj 		       unsigned max_tree_size);
106938fd1498Szrj 
107038fd1498Szrj static slp_tree
vect_build_slp_tree(vec_info * vinfo,vec<gimple * > stmts,unsigned int group_size,poly_uint64 * max_nunits,vec<slp_tree> * loads,bool * matches,unsigned * npermutes,unsigned * tree_size,unsigned max_tree_size)107138fd1498Szrj vect_build_slp_tree (vec_info *vinfo,
107238fd1498Szrj 		     vec<gimple *> stmts, unsigned int group_size,
107338fd1498Szrj 		     poly_uint64 *max_nunits, vec<slp_tree> *loads,
107438fd1498Szrj 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
107538fd1498Szrj 		     unsigned max_tree_size)
107638fd1498Szrj {
107738fd1498Szrj   if (bst_fail->contains (stmts))
107838fd1498Szrj     return NULL;
107938fd1498Szrj   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
108038fd1498Szrj 					loads, matches, npermutes, tree_size,
108138fd1498Szrj 					max_tree_size);
108238fd1498Szrj   /* When SLP build fails for stmts record this, otherwise SLP build
108338fd1498Szrj      can be exponential in time when we allow to construct parts from
108438fd1498Szrj      scalars, see PR81723.  */
108538fd1498Szrj   if (! res)
108638fd1498Szrj     {
108738fd1498Szrj       vec <gimple *> x;
108838fd1498Szrj       x.create (stmts.length ());
108938fd1498Szrj       x.splice (stmts);
109038fd1498Szrj       bst_fail->add (x);
109138fd1498Szrj     }
109238fd1498Szrj   return res;
109338fd1498Szrj }
109438fd1498Szrj 
109538fd1498Szrj /* Recursively build an SLP tree starting from NODE.
109638fd1498Szrj    Fail (and return a value not equal to zero) if def-stmts are not
109738fd1498Szrj    isomorphic, require data permutation or are of unsupported types of
109838fd1498Szrj    operation.  Otherwise, return 0.
109938fd1498Szrj    The value returned is the depth in the SLP tree where a mismatch
110038fd1498Szrj    was found.  */
110138fd1498Szrj 
110238fd1498Szrj static slp_tree
vect_build_slp_tree_2(vec_info * vinfo,vec<gimple * > stmts,unsigned int group_size,poly_uint64 * max_nunits,vec<slp_tree> * loads,bool * matches,unsigned * npermutes,unsigned * tree_size,unsigned max_tree_size)110338fd1498Szrj vect_build_slp_tree_2 (vec_info *vinfo,
110438fd1498Szrj 		       vec<gimple *> stmts, unsigned int group_size,
110538fd1498Szrj 		       poly_uint64 *max_nunits,
110638fd1498Szrj 		       vec<slp_tree> *loads,
110738fd1498Szrj 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
110838fd1498Szrj 		       unsigned max_tree_size)
110938fd1498Szrj {
111038fd1498Szrj   unsigned nops, i, this_tree_size = 0;
111138fd1498Szrj   poly_uint64 this_max_nunits = *max_nunits;
111238fd1498Szrj   gimple *stmt;
111338fd1498Szrj   slp_tree node;
111438fd1498Szrj 
111538fd1498Szrj   matches[0] = false;
111638fd1498Szrj 
111738fd1498Szrj   stmt = stmts[0];
111838fd1498Szrj   if (is_gimple_call (stmt))
111938fd1498Szrj     nops = gimple_call_num_args (stmt);
112038fd1498Szrj   else if (is_gimple_assign (stmt))
112138fd1498Szrj     {
112238fd1498Szrj       nops = gimple_num_ops (stmt) - 1;
112338fd1498Szrj       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
112438fd1498Szrj 	nops++;
112538fd1498Szrj     }
112638fd1498Szrj   else if (gimple_code (stmt) == GIMPLE_PHI)
112738fd1498Szrj     nops = 0;
112838fd1498Szrj   else
112938fd1498Szrj     return NULL;
113038fd1498Szrj 
113138fd1498Szrj   /* If the SLP node is a PHI (induction or reduction), terminate
113238fd1498Szrj      the recursion.  */
113338fd1498Szrj   if (gimple_code (stmt) == GIMPLE_PHI)
113438fd1498Szrj     {
113538fd1498Szrj       tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
113638fd1498Szrj       tree vectype = get_vectype_for_scalar_type (scalar_type);
113738fd1498Szrj       if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
113838fd1498Szrj 				   max_nunits))
113938fd1498Szrj 	return NULL;
114038fd1498Szrj 
114138fd1498Szrj       vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
114238fd1498Szrj       /* Induction from different IVs is not supported.  */
114338fd1498Szrj       if (def_type == vect_induction_def)
114438fd1498Szrj 	{
114538fd1498Szrj 	  FOR_EACH_VEC_ELT (stmts, i, stmt)
114638fd1498Szrj 	    if (stmt != stmts[0])
114738fd1498Szrj 	      return NULL;
114838fd1498Szrj 	}
114938fd1498Szrj       else
115038fd1498Szrj 	{
115138fd1498Szrj 	  /* Else def types have to match.  */
115238fd1498Szrj 	  FOR_EACH_VEC_ELT (stmts, i, stmt)
115338fd1498Szrj 	    {
115438fd1498Szrj 	      /* But for reduction chains only check on the first stmt.  */
115538fd1498Szrj 	      if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
115638fd1498Szrj 		  && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
115738fd1498Szrj 		continue;
115838fd1498Szrj 	      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
115938fd1498Szrj 		return NULL;
116038fd1498Szrj 	    }
116138fd1498Szrj 	}
116238fd1498Szrj       node = vect_create_new_slp_node (stmts);
116338fd1498Szrj       return node;
116438fd1498Szrj     }
116538fd1498Szrj 
116638fd1498Szrj 
116738fd1498Szrj   bool two_operators = false;
116838fd1498Szrj   unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
116938fd1498Szrj   if (!vect_build_slp_tree_1 (vinfo, swap,
117038fd1498Szrj 			      stmts, group_size, nops,
117138fd1498Szrj 			      &this_max_nunits, matches, &two_operators))
117238fd1498Szrj     return NULL;
117338fd1498Szrj 
117438fd1498Szrj   /* If the SLP node is a load, terminate the recursion.  */
117538fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
117638fd1498Szrj       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
117738fd1498Szrj     {
117838fd1498Szrj       *max_nunits = this_max_nunits;
117938fd1498Szrj       node = vect_create_new_slp_node (stmts);
118038fd1498Szrj       loads->safe_push (node);
118138fd1498Szrj       return node;
118238fd1498Szrj     }
118338fd1498Szrj 
118438fd1498Szrj   /* Get at the operands, verifying they are compatible.  */
118538fd1498Szrj   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
118638fd1498Szrj   slp_oprnd_info oprnd_info;
118738fd1498Szrj   FOR_EACH_VEC_ELT (stmts, i, stmt)
118838fd1498Szrj     {
118938fd1498Szrj       int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
119038fd1498Szrj 					     stmts, i, &oprnds_info);
119138fd1498Szrj       if (res != 0)
119238fd1498Szrj 	matches[(res == -1) ? 0 : i] = false;
119338fd1498Szrj       if (!matches[0])
119438fd1498Szrj 	break;
119538fd1498Szrj     }
119638fd1498Szrj   for (i = 0; i < group_size; ++i)
119738fd1498Szrj     if (!matches[i])
119838fd1498Szrj       {
119938fd1498Szrj 	vect_free_oprnd_info (oprnds_info);
120038fd1498Szrj 	return NULL;
120138fd1498Szrj       }
120238fd1498Szrj 
120338fd1498Szrj   auto_vec<slp_tree, 4> children;
120438fd1498Szrj   auto_vec<slp_tree> this_loads;
120538fd1498Szrj 
120638fd1498Szrj   stmt = stmts[0];
120738fd1498Szrj 
120838fd1498Szrj   if (tree_size)
120938fd1498Szrj     max_tree_size -= *tree_size;
121038fd1498Szrj 
121138fd1498Szrj   /* Create SLP_TREE nodes for the definition node/s.  */
121238fd1498Szrj   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
121338fd1498Szrj     {
121438fd1498Szrj       slp_tree child;
121538fd1498Szrj       unsigned old_nloads = this_loads.length ();
121638fd1498Szrj       unsigned old_tree_size = this_tree_size;
121738fd1498Szrj       unsigned int j;
121838fd1498Szrj 
121938fd1498Szrj       if (oprnd_info->first_dt != vect_internal_def
122038fd1498Szrj 	  && oprnd_info->first_dt != vect_reduction_def
122138fd1498Szrj 	  && oprnd_info->first_dt != vect_induction_def)
122238fd1498Szrj         continue;
122338fd1498Szrj 
122438fd1498Szrj       if (++this_tree_size > max_tree_size)
122538fd1498Szrj 	{
122638fd1498Szrj 	  FOR_EACH_VEC_ELT (children, j, child)
122738fd1498Szrj 	    vect_free_slp_tree (child);
122838fd1498Szrj 	  vect_free_oprnd_info (oprnds_info);
122938fd1498Szrj 	  return NULL;
123038fd1498Szrj 	}
123138fd1498Szrj 
123238fd1498Szrj       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
123338fd1498Szrj 					group_size, &this_max_nunits,
123438fd1498Szrj 					&this_loads, matches, npermutes,
123538fd1498Szrj 					&this_tree_size,
123638fd1498Szrj 					max_tree_size)) != NULL)
123738fd1498Szrj 	{
123838fd1498Szrj 	  /* If we have all children of child built up from scalars then just
123938fd1498Szrj 	     throw that away and build it up this node from scalars.  */
124038fd1498Szrj 	  if (!SLP_TREE_CHILDREN (child).is_empty ()
124138fd1498Szrj 	      /* ???  Rejecting patterns this way doesn't work.  We'd have to
124238fd1498Szrj 		 do extra work to cancel the pattern so the uses see the
124338fd1498Szrj 		 scalar version.  */
124438fd1498Szrj 	      && !is_pattern_stmt_p
124538fd1498Szrj 	            (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
124638fd1498Szrj 	    {
124738fd1498Szrj 	      slp_tree grandchild;
124838fd1498Szrj 
124938fd1498Szrj 	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
125038fd1498Szrj 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
125138fd1498Szrj 		  break;
125238fd1498Szrj 	      if (!grandchild)
125338fd1498Szrj 		{
125438fd1498Szrj 		  /* Roll back.  */
125538fd1498Szrj 		  this_loads.truncate (old_nloads);
125638fd1498Szrj 		  this_tree_size = old_tree_size;
125738fd1498Szrj 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
125838fd1498Szrj 		    vect_free_slp_tree (grandchild);
125938fd1498Szrj 		  SLP_TREE_CHILDREN (child).truncate (0);
126038fd1498Szrj 
126138fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location,
126238fd1498Szrj 				   "Building parent vector operands from "
126338fd1498Szrj 				   "scalars instead\n");
126438fd1498Szrj 		  oprnd_info->def_stmts = vNULL;
126538fd1498Szrj 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
126638fd1498Szrj 		  children.safe_push (child);
126738fd1498Szrj 		  continue;
126838fd1498Szrj 		}
126938fd1498Szrj 	    }
127038fd1498Szrj 
127138fd1498Szrj 	  oprnd_info->def_stmts = vNULL;
127238fd1498Szrj 	  children.safe_push (child);
127338fd1498Szrj 	  continue;
127438fd1498Szrj 	}
127538fd1498Szrj 
127638fd1498Szrj       /* If the SLP build failed fatally and we analyze a basic-block
127738fd1498Szrj          simply treat nodes we fail to build as externally defined
127838fd1498Szrj 	 (and thus build vectors from the scalar defs).
127938fd1498Szrj 	 The cost model will reject outright expensive cases.
128038fd1498Szrj 	 ???  This doesn't treat cases where permutation ultimatively
128138fd1498Szrj 	 fails (or we don't try permutation below).  Ideally we'd
128238fd1498Szrj 	 even compute a permutation that will end up with the maximum
128338fd1498Szrj 	 SLP tree size...  */
128438fd1498Szrj       if (is_a <bb_vec_info> (vinfo)
128538fd1498Szrj 	  && !matches[0]
128638fd1498Szrj 	  /* ???  Rejecting patterns this way doesn't work.  We'd have to
128738fd1498Szrj 	     do extra work to cancel the pattern so the uses see the
128838fd1498Szrj 	     scalar version.  */
128938fd1498Szrj 	  && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
129038fd1498Szrj 	{
129138fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
129238fd1498Szrj 			   "Building vector operands from scalars\n");
129338fd1498Szrj 	  child = vect_create_new_slp_node (oprnd_info->def_stmts);
129438fd1498Szrj 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
129538fd1498Szrj 	  children.safe_push (child);
129638fd1498Szrj 	  oprnd_info->def_stmts = vNULL;
129738fd1498Szrj 	  continue;
129838fd1498Szrj 	}
129938fd1498Szrj 
130038fd1498Szrj       /* If the SLP build for operand zero failed and operand zero
130138fd1498Szrj 	 and one can be commutated try that for the scalar stmts
130238fd1498Szrj 	 that failed the match.  */
130338fd1498Szrj       if (i == 0
130438fd1498Szrj 	  /* A first scalar stmt mismatch signals a fatal mismatch.  */
130538fd1498Szrj 	  && matches[0]
130638fd1498Szrj 	  /* ???  For COND_EXPRs we can swap the comparison operands
130738fd1498Szrj 	     as well as the arms under some constraints.  */
130838fd1498Szrj 	  && nops == 2
130938fd1498Szrj 	  && oprnds_info[1]->first_dt == vect_internal_def
131038fd1498Szrj 	  && is_gimple_assign (stmt)
131138fd1498Szrj 	  /* Do so only if the number of not successful permutes was nor more
131238fd1498Szrj 	     than a cut-ff as re-trying the recursive match on
131338fd1498Szrj 	     possibly each level of the tree would expose exponential
131438fd1498Szrj 	     behavior.  */
131538fd1498Szrj 	  && *npermutes < 4)
131638fd1498Szrj 	{
131738fd1498Szrj 	  /* See whether we can swap the matching or the non-matching
131838fd1498Szrj 	     stmt operands.  */
131938fd1498Szrj 	  bool swap_not_matching = true;
132038fd1498Szrj 	  do
132138fd1498Szrj 	    {
132238fd1498Szrj 	      for (j = 0; j < group_size; ++j)
132338fd1498Szrj 		{
132438fd1498Szrj 		  if (matches[j] != !swap_not_matching)
132538fd1498Szrj 		    continue;
132638fd1498Szrj 		  gimple *stmt = stmts[j];
132738fd1498Szrj 		  /* Verify if we can swap operands of this stmt.  */
132838fd1498Szrj 		  if (!is_gimple_assign (stmt)
132938fd1498Szrj 		      || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
133038fd1498Szrj 		    {
133138fd1498Szrj 		      if (!swap_not_matching)
133238fd1498Szrj 			goto fail;
133338fd1498Szrj 		      swap_not_matching = false;
133438fd1498Szrj 		      break;
133538fd1498Szrj 		    }
133638fd1498Szrj 		  /* Verify if we can safely swap or if we committed to a
133738fd1498Szrj 		     specific operand order already.
133838fd1498Szrj 		     ???  Instead of modifying GIMPLE stmts here we could
133938fd1498Szrj 		     record whether we want to swap operands in the SLP
134038fd1498Szrj 		     node and temporarily do that when processing it
134138fd1498Szrj 		     (or wrap operand accessors in a helper).  */
134238fd1498Szrj 		  else if (swap[j] != 0
134338fd1498Szrj 			   || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
134438fd1498Szrj 		    {
134538fd1498Szrj 		      if (!swap_not_matching)
134638fd1498Szrj 			{
134738fd1498Szrj 			  if (dump_enabled_p ())
134838fd1498Szrj 			    {
134938fd1498Szrj 			      dump_printf_loc (MSG_MISSED_OPTIMIZATION,
135038fd1498Szrj 					       vect_location,
135138fd1498Szrj 					       "Build SLP failed: cannot swap "
135238fd1498Szrj 					       "operands of shared stmt ");
135338fd1498Szrj 			      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
135438fd1498Szrj 						TDF_SLIM, stmts[j], 0);
135538fd1498Szrj 			    }
135638fd1498Szrj 			  goto fail;
135738fd1498Szrj 			}
135838fd1498Szrj 		      swap_not_matching = false;
135938fd1498Szrj 		      break;
136038fd1498Szrj 		    }
136138fd1498Szrj 		}
136238fd1498Szrj 	    }
136338fd1498Szrj 	  while (j != group_size);
136438fd1498Szrj 
136538fd1498Szrj 	  /* Swap mismatched definition stmts.  */
136638fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
136738fd1498Szrj 			   "Re-trying with swapped operands of stmts ");
136838fd1498Szrj 	  for (j = 0; j < group_size; ++j)
136938fd1498Szrj 	    if (matches[j] == !swap_not_matching)
137038fd1498Szrj 	      {
137138fd1498Szrj 		std::swap (oprnds_info[0]->def_stmts[j],
137238fd1498Szrj 			   oprnds_info[1]->def_stmts[j]);
137338fd1498Szrj 		dump_printf (MSG_NOTE, "%d ", j);
137438fd1498Szrj 	      }
137538fd1498Szrj 	  dump_printf (MSG_NOTE, "\n");
137638fd1498Szrj 	  /* And try again with scratch 'matches' ... */
137738fd1498Szrj 	  bool *tem = XALLOCAVEC (bool, group_size);
137838fd1498Szrj 	  if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
137938fd1498Szrj 					    group_size, &this_max_nunits,
138038fd1498Szrj 					    &this_loads, tem, npermutes,
138138fd1498Szrj 					    &this_tree_size,
138238fd1498Szrj 					    max_tree_size)) != NULL)
138338fd1498Szrj 	    {
138438fd1498Szrj 	      /* ... so if successful we can apply the operand swapping
138538fd1498Szrj 		 to the GIMPLE IL.  This is necessary because for example
138638fd1498Szrj 		 vect_get_slp_defs uses operand indexes and thus expects
138738fd1498Szrj 		 canonical operand order.  This is also necessary even
138838fd1498Szrj 		 if we end up building the operand from scalars as
138938fd1498Szrj 		 we'll continue to process swapped operand two.  */
139038fd1498Szrj 	      for (j = 0; j < group_size; ++j)
139138fd1498Szrj 		{
139238fd1498Szrj 		  gimple *stmt = stmts[j];
139338fd1498Szrj 		  gimple_set_plf (stmt, GF_PLF_1, false);
139438fd1498Szrj 		}
139538fd1498Szrj 	      for (j = 0; j < group_size; ++j)
139638fd1498Szrj 		{
139738fd1498Szrj 		  gimple *stmt = stmts[j];
139838fd1498Szrj 		  if (matches[j] == !swap_not_matching)
139938fd1498Szrj 		    {
140038fd1498Szrj 		      /* Avoid swapping operands twice.  */
140138fd1498Szrj 		      if (gimple_plf (stmt, GF_PLF_1))
140238fd1498Szrj 			continue;
140338fd1498Szrj 		      swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
140438fd1498Szrj 					 gimple_assign_rhs2_ptr (stmt));
140538fd1498Szrj 		      gimple_set_plf (stmt, GF_PLF_1, true);
140638fd1498Szrj 		    }
140738fd1498Szrj 		}
140838fd1498Szrj 	      /* Verify we swap all duplicates or none.  */
140938fd1498Szrj 	      if (flag_checking)
141038fd1498Szrj 		for (j = 0; j < group_size; ++j)
141138fd1498Szrj 		  {
141238fd1498Szrj 		    gimple *stmt = stmts[j];
141338fd1498Szrj 		    gcc_assert (gimple_plf (stmt, GF_PLF_1)
141438fd1498Szrj 				== (matches[j] == !swap_not_matching));
141538fd1498Szrj 		  }
141638fd1498Szrj 
141738fd1498Szrj 	      /* If we have all children of child built up from scalars then
141838fd1498Szrj 		 just throw that away and build it up this node from scalars.  */
141938fd1498Szrj 	      if (!SLP_TREE_CHILDREN (child).is_empty ()
142038fd1498Szrj 		  /* ???  Rejecting patterns this way doesn't work.  We'd have
142138fd1498Szrj 		     to do extra work to cancel the pattern so the uses see the
142238fd1498Szrj 		     scalar version.  */
142338fd1498Szrj 		  && !is_pattern_stmt_p
142438fd1498Szrj 			(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
142538fd1498Szrj 		{
142638fd1498Szrj 		  unsigned int j;
142738fd1498Szrj 		  slp_tree grandchild;
142838fd1498Szrj 
142938fd1498Szrj 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
143038fd1498Szrj 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
143138fd1498Szrj 		      break;
143238fd1498Szrj 		  if (!grandchild)
143338fd1498Szrj 		    {
143438fd1498Szrj 		      /* Roll back.  */
143538fd1498Szrj 		      this_loads.truncate (old_nloads);
143638fd1498Szrj 		      this_tree_size = old_tree_size;
143738fd1498Szrj 		      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
143838fd1498Szrj 			vect_free_slp_tree (grandchild);
143938fd1498Szrj 		      SLP_TREE_CHILDREN (child).truncate (0);
144038fd1498Szrj 
144138fd1498Szrj 		      dump_printf_loc (MSG_NOTE, vect_location,
144238fd1498Szrj 				       "Building parent vector operands from "
144338fd1498Szrj 				       "scalars instead\n");
144438fd1498Szrj 		      oprnd_info->def_stmts = vNULL;
144538fd1498Szrj 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
144638fd1498Szrj 		      children.safe_push (child);
144738fd1498Szrj 		      continue;
144838fd1498Szrj 		    }
144938fd1498Szrj 		}
145038fd1498Szrj 
145138fd1498Szrj 	      oprnd_info->def_stmts = vNULL;
145238fd1498Szrj 	      children.safe_push (child);
145338fd1498Szrj 	      continue;
145438fd1498Szrj 	    }
145538fd1498Szrj 
145638fd1498Szrj 	  ++*npermutes;
145738fd1498Szrj 	}
145838fd1498Szrj 
145938fd1498Szrj fail:
146038fd1498Szrj       gcc_assert (child == NULL);
146138fd1498Szrj       FOR_EACH_VEC_ELT (children, j, child)
146238fd1498Szrj 	vect_free_slp_tree (child);
146338fd1498Szrj       vect_free_oprnd_info (oprnds_info);
146438fd1498Szrj       return NULL;
146538fd1498Szrj     }
146638fd1498Szrj 
146738fd1498Szrj   vect_free_oprnd_info (oprnds_info);
146838fd1498Szrj 
146938fd1498Szrj   if (tree_size)
147038fd1498Szrj     *tree_size += this_tree_size;
147138fd1498Szrj   *max_nunits = this_max_nunits;
147238fd1498Szrj   loads->safe_splice (this_loads);
147338fd1498Szrj 
147438fd1498Szrj   node = vect_create_new_slp_node (stmts);
147538fd1498Szrj   SLP_TREE_TWO_OPERATORS (node) = two_operators;
147638fd1498Szrj   SLP_TREE_CHILDREN (node).splice (children);
147738fd1498Szrj   return node;
147838fd1498Szrj }
147938fd1498Szrj 
148038fd1498Szrj /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
148138fd1498Szrj 
148238fd1498Szrj static void
vect_print_slp_tree(dump_flags_t dump_kind,location_t loc,slp_tree node)148338fd1498Szrj vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
148438fd1498Szrj {
148538fd1498Szrj   int i;
148638fd1498Szrj   gimple *stmt;
148738fd1498Szrj   slp_tree child;
148838fd1498Szrj 
148938fd1498Szrj   dump_printf_loc (dump_kind, loc, "node%s\n",
149038fd1498Szrj 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
149138fd1498Szrj 		   ? " (external)" : "");
149238fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
149338fd1498Szrj     {
149438fd1498Szrj       dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
149538fd1498Szrj       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
149638fd1498Szrj     }
149738fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
149838fd1498Szrj     vect_print_slp_tree (dump_kind, loc, child);
149938fd1498Szrj }
150038fd1498Szrj 
150138fd1498Szrj 
150238fd1498Szrj /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
150338fd1498Szrj    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
150438fd1498Szrj    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
150538fd1498Szrj    stmts in NODE are to be marked.  */
150638fd1498Szrj 
150738fd1498Szrj static void
vect_mark_slp_stmts(slp_tree node,enum slp_vect_type mark,int j)150838fd1498Szrj vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
150938fd1498Szrj {
151038fd1498Szrj   int i;
151138fd1498Szrj   gimple *stmt;
151238fd1498Szrj   slp_tree child;
151338fd1498Szrj 
151438fd1498Szrj   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
151538fd1498Szrj     return;
151638fd1498Szrj 
151738fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
151838fd1498Szrj     if (j < 0 || i == j)
151938fd1498Szrj       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
152038fd1498Szrj 
152138fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
152238fd1498Szrj     vect_mark_slp_stmts (child, mark, j);
152338fd1498Szrj }
152438fd1498Szrj 
152538fd1498Szrj 
152638fd1498Szrj /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
152738fd1498Szrj 
152838fd1498Szrj static void
vect_mark_slp_stmts_relevant(slp_tree node)152938fd1498Szrj vect_mark_slp_stmts_relevant (slp_tree node)
153038fd1498Szrj {
153138fd1498Szrj   int i;
153238fd1498Szrj   gimple *stmt;
153338fd1498Szrj   stmt_vec_info stmt_info;
153438fd1498Szrj   slp_tree child;
153538fd1498Szrj 
153638fd1498Szrj   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
153738fd1498Szrj     return;
153838fd1498Szrj 
153938fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
154038fd1498Szrj     {
154138fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
154238fd1498Szrj       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
154338fd1498Szrj                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
154438fd1498Szrj       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
154538fd1498Szrj     }
154638fd1498Szrj 
154738fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
154838fd1498Szrj     vect_mark_slp_stmts_relevant (child);
154938fd1498Szrj }
155038fd1498Szrj 
155138fd1498Szrj 
155238fd1498Szrj /* Rearrange the statements of NODE according to PERMUTATION.  */
155338fd1498Szrj 
155438fd1498Szrj static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,vec<unsigned> permutation)155538fd1498Szrj vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
155638fd1498Szrj                           vec<unsigned> permutation)
155738fd1498Szrj {
155838fd1498Szrj   gimple *stmt;
155938fd1498Szrj   vec<gimple *> tmp_stmts;
156038fd1498Szrj   unsigned int i;
156138fd1498Szrj   slp_tree child;
156238fd1498Szrj 
156338fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
156438fd1498Szrj     vect_slp_rearrange_stmts (child, group_size, permutation);
156538fd1498Szrj 
156638fd1498Szrj   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
156738fd1498Szrj   tmp_stmts.create (group_size);
156838fd1498Szrj   tmp_stmts.quick_grow_cleared (group_size);
156938fd1498Szrj 
157038fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
157138fd1498Szrj     tmp_stmts[permutation[i]] = stmt;
157238fd1498Szrj 
157338fd1498Szrj   SLP_TREE_SCALAR_STMTS (node).release ();
157438fd1498Szrj   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
157538fd1498Szrj }
157638fd1498Szrj 
157738fd1498Szrj 
157838fd1498Szrj /* Attempt to reorder stmts in a reduction chain so that we don't
157938fd1498Szrj    require any load permutation.  Return true if that was possible,
158038fd1498Szrj    otherwise return false.  */
158138fd1498Szrj 
158238fd1498Szrj static bool
vect_attempt_slp_rearrange_stmts(slp_instance slp_instn)158338fd1498Szrj vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
158438fd1498Szrj {
158538fd1498Szrj   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
158638fd1498Szrj   unsigned int i, j;
158738fd1498Szrj   unsigned int lidx;
158838fd1498Szrj   slp_tree node, load;
158938fd1498Szrj 
159038fd1498Szrj   /* Compare all the permutation sequences to the first one.  We know
159138fd1498Szrj      that at least one load is permuted.  */
159238fd1498Szrj   node = SLP_INSTANCE_LOADS (slp_instn)[0];
159338fd1498Szrj   if (!node->load_permutation.exists ())
159438fd1498Szrj     return false;
159538fd1498Szrj   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
159638fd1498Szrj     {
159738fd1498Szrj       if (!load->load_permutation.exists ())
159838fd1498Szrj 	return false;
159938fd1498Szrj       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
160038fd1498Szrj 	if (lidx != node->load_permutation[j])
160138fd1498Szrj 	  return false;
160238fd1498Szrj     }
160338fd1498Szrj 
160438fd1498Szrj   /* Check that the loads in the first sequence are different and there
160538fd1498Szrj      are no gaps between them.  */
160638fd1498Szrj   auto_sbitmap load_index (group_size);
160738fd1498Szrj   bitmap_clear (load_index);
160838fd1498Szrj   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
160938fd1498Szrj     {
161038fd1498Szrj       if (lidx >= group_size)
161138fd1498Szrj 	return false;
161238fd1498Szrj       if (bitmap_bit_p (load_index, lidx))
161338fd1498Szrj 	return false;
161438fd1498Szrj 
161538fd1498Szrj       bitmap_set_bit (load_index, lidx);
161638fd1498Szrj     }
161738fd1498Szrj   for (i = 0; i < group_size; i++)
161838fd1498Szrj     if (!bitmap_bit_p (load_index, i))
161938fd1498Szrj       return false;
162038fd1498Szrj 
162138fd1498Szrj   /* This permutation is valid for reduction.  Since the order of the
162238fd1498Szrj      statements in the nodes is not important unless they are memory
162338fd1498Szrj      accesses, we can rearrange the statements in all the nodes
162438fd1498Szrj      according to the order of the loads.  */
162538fd1498Szrj   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
162638fd1498Szrj 			    node->load_permutation);
162738fd1498Szrj 
162838fd1498Szrj   /* We are done, no actual permutations need to be generated.  */
162938fd1498Szrj   poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
163038fd1498Szrj   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
163138fd1498Szrj     {
163238fd1498Szrj       gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
163338fd1498Szrj       first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
163438fd1498Szrj       /* But we have to keep those permutations that are required because
163538fd1498Szrj          of handling of gaps.  */
163638fd1498Szrj       if (known_eq (unrolling_factor, 1U)
163738fd1498Szrj 	  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
163838fd1498Szrj 	      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
163938fd1498Szrj 	SLP_TREE_LOAD_PERMUTATION (node).release ();
164038fd1498Szrj       else
164138fd1498Szrj 	for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
164238fd1498Szrj 	  SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
164338fd1498Szrj     }
164438fd1498Szrj 
164538fd1498Szrj   return true;
164638fd1498Szrj }
164738fd1498Szrj 
164838fd1498Szrj /* Check if the required load permutations in the SLP instance
164938fd1498Szrj    SLP_INSTN are supported.  */
165038fd1498Szrj 
165138fd1498Szrj static bool
vect_supported_load_permutation_p(slp_instance slp_instn)165238fd1498Szrj vect_supported_load_permutation_p (slp_instance slp_instn)
165338fd1498Szrj {
165438fd1498Szrj   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
165538fd1498Szrj   unsigned int i, j, k, next;
165638fd1498Szrj   slp_tree node;
165738fd1498Szrj   gimple *stmt, *load, *next_load;
165838fd1498Szrj 
165938fd1498Szrj   if (dump_enabled_p ())
166038fd1498Szrj     {
166138fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
166238fd1498Szrj       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
166338fd1498Szrj 	if (node->load_permutation.exists ())
166438fd1498Szrj 	  FOR_EACH_VEC_ELT (node->load_permutation, j, next)
166538fd1498Szrj 	    dump_printf (MSG_NOTE, "%d ", next);
166638fd1498Szrj 	else
166738fd1498Szrj 	  for (k = 0; k < group_size; ++k)
166838fd1498Szrj 	    dump_printf (MSG_NOTE, "%d ", k);
166938fd1498Szrj       dump_printf (MSG_NOTE, "\n");
167038fd1498Szrj     }
167138fd1498Szrj 
167238fd1498Szrj   /* In case of reduction every load permutation is allowed, since the order
167338fd1498Szrj      of the reduction statements is not important (as opposed to the case of
167438fd1498Szrj      grouped stores).  The only condition we need to check is that all the
167538fd1498Szrj      load nodes are of the same size and have the same permutation (and then
167638fd1498Szrj      rearrange all the nodes of the SLP instance according to this
167738fd1498Szrj      permutation).  */
167838fd1498Szrj 
167938fd1498Szrj   /* Check that all the load nodes are of the same size.  */
168038fd1498Szrj   /* ???  Can't we assert this? */
168138fd1498Szrj   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
168238fd1498Szrj     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
168338fd1498Szrj       return false;
168438fd1498Szrj 
168538fd1498Szrj   node = SLP_INSTANCE_TREE (slp_instn);
168638fd1498Szrj   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
168738fd1498Szrj 
168838fd1498Szrj   /* Reduction (there are no data-refs in the root).
168938fd1498Szrj      In reduction chain the order of the loads is not important.  */
169038fd1498Szrj   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
169138fd1498Szrj       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
169238fd1498Szrj     vect_attempt_slp_rearrange_stmts (slp_instn);
169338fd1498Szrj 
169438fd1498Szrj   /* In basic block vectorization we allow any subchain of an interleaving
169538fd1498Szrj      chain.
169638fd1498Szrj      FORNOW: not supported in loop SLP because of realignment compications.  */
169738fd1498Szrj   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
169838fd1498Szrj     {
169938fd1498Szrj       /* Check whether the loads in an instance form a subchain and thus
170038fd1498Szrj          no permutation is necessary.  */
170138fd1498Szrj       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
170238fd1498Szrj         {
170338fd1498Szrj 	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
170438fd1498Szrj 	    continue;
170538fd1498Szrj 	  bool subchain_p = true;
170638fd1498Szrj           next_load = NULL;
170738fd1498Szrj           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
170838fd1498Szrj             {
170938fd1498Szrj               if (j != 0
171038fd1498Szrj 		  && (next_load != load
171138fd1498Szrj 		      || GROUP_GAP (vinfo_for_stmt (load)) != 1))
171238fd1498Szrj 		{
171338fd1498Szrj 		  subchain_p = false;
171438fd1498Szrj 		  break;
171538fd1498Szrj 		}
171638fd1498Szrj               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
171738fd1498Szrj             }
171838fd1498Szrj 	  if (subchain_p)
171938fd1498Szrj 	    SLP_TREE_LOAD_PERMUTATION (node).release ();
172038fd1498Szrj 	  else
172138fd1498Szrj 	    {
172238fd1498Szrj 	      stmt_vec_info group_info
172338fd1498Szrj 		= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
172438fd1498Szrj 	      group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
172538fd1498Szrj 	      unsigned HOST_WIDE_INT nunits;
172638fd1498Szrj 	      unsigned k, maxk = 0;
172738fd1498Szrj 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
172838fd1498Szrj 		if (k > maxk)
172938fd1498Szrj 		  maxk = k;
173038fd1498Szrj 	      /* In BB vectorization we may not actually use a loaded vector
173138fd1498Szrj 		 accessing elements in excess of GROUP_SIZE.  */
173238fd1498Szrj 	      tree vectype = STMT_VINFO_VECTYPE (group_info);
173338fd1498Szrj 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
173438fd1498Szrj 		  || maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
173538fd1498Szrj 		{
173638fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
173738fd1498Szrj 				   "BB vectorization with gaps at the end of "
173838fd1498Szrj 				   "a load is not supported\n");
173938fd1498Szrj 		  return false;
174038fd1498Szrj 		}
174138fd1498Szrj 
174238fd1498Szrj 	      /* Verify the permutation can be generated.  */
174338fd1498Szrj 	      vec<tree> tem;
174438fd1498Szrj 	      unsigned n_perms;
174538fd1498Szrj 	      if (!vect_transform_slp_perm_load (node, tem, NULL,
174638fd1498Szrj 						 1, slp_instn, true, &n_perms))
174738fd1498Szrj 		{
174838fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
174938fd1498Szrj 				   vect_location,
175038fd1498Szrj 				   "unsupported load permutation\n");
175138fd1498Szrj 		  return false;
175238fd1498Szrj 		}
175338fd1498Szrj 	    }
175438fd1498Szrj         }
175538fd1498Szrj       return true;
175638fd1498Szrj     }
175738fd1498Szrj 
175838fd1498Szrj   /* For loop vectorization verify we can generate the permutation.  Be
175938fd1498Szrj      conservative about the vectorization factor, there are permutations
176038fd1498Szrj      that will use three vector inputs only starting from a specific factor
176138fd1498Szrj      and the vectorization factor is not yet final.
176238fd1498Szrj      ???  The SLP instance unrolling factor might not be the maximum one.  */
176338fd1498Szrj   unsigned n_perms;
176438fd1498Szrj   poly_uint64 test_vf
176538fd1498Szrj     = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
176638fd1498Szrj 			     LOOP_VINFO_VECT_FACTOR
176738fd1498Szrj 			     (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
176838fd1498Szrj   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
176938fd1498Szrj     if (node->load_permutation.exists ()
177038fd1498Szrj 	&& !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
177138fd1498Szrj 					  slp_instn, true, &n_perms))
177238fd1498Szrj       return false;
177338fd1498Szrj 
177438fd1498Szrj   return true;
177538fd1498Szrj }
177638fd1498Szrj 
177738fd1498Szrj 
177838fd1498Szrj /* Find the last store in SLP INSTANCE.  */
177938fd1498Szrj 
178038fd1498Szrj gimple *
vect_find_last_scalar_stmt_in_slp(slp_tree node)178138fd1498Szrj vect_find_last_scalar_stmt_in_slp (slp_tree node)
178238fd1498Szrj {
178338fd1498Szrj   gimple *last = NULL, *stmt;
178438fd1498Szrj 
178538fd1498Szrj   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
178638fd1498Szrj     {
178738fd1498Szrj       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
178838fd1498Szrj       if (is_pattern_stmt_p (stmt_vinfo))
178938fd1498Szrj 	last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
179038fd1498Szrj       else
179138fd1498Szrj 	last = get_later_stmt (stmt, last);
179238fd1498Szrj     }
179338fd1498Szrj 
179438fd1498Szrj   return last;
179538fd1498Szrj }
179638fd1498Szrj 
179738fd1498Szrj /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
179838fd1498Szrj 
179938fd1498Szrj static void
vect_analyze_slp_cost_1(slp_instance instance,slp_tree node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,unsigned ncopies_for_cost,scalar_stmts_set_t * visited)180038fd1498Szrj vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
180138fd1498Szrj 			 stmt_vector_for_cost *prologue_cost_vec,
180238fd1498Szrj 			 stmt_vector_for_cost *body_cost_vec,
180338fd1498Szrj 			 unsigned ncopies_for_cost,
180438fd1498Szrj 			 scalar_stmts_set_t* visited)
180538fd1498Szrj {
180638fd1498Szrj   unsigned i, j;
180738fd1498Szrj   slp_tree child;
180838fd1498Szrj   gimple *stmt;
180938fd1498Szrj   stmt_vec_info stmt_info;
181038fd1498Szrj   tree lhs;
181138fd1498Szrj 
181238fd1498Szrj   /* If we already costed the exact same set of scalar stmts we're done.
181338fd1498Szrj      We share the generated vector stmts for those.  */
181438fd1498Szrj   if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
181538fd1498Szrj     return;
181638fd1498Szrj 
181738fd1498Szrj   visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
181838fd1498Szrj 
181938fd1498Szrj   /* Recurse down the SLP tree.  */
182038fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
182138fd1498Szrj     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
182238fd1498Szrj       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
182338fd1498Szrj 			       body_cost_vec, ncopies_for_cost, visited);
182438fd1498Szrj 
182538fd1498Szrj   /* Look at the first scalar stmt to determine the cost.  */
182638fd1498Szrj   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
182738fd1498Szrj   stmt_info = vinfo_for_stmt (stmt);
182838fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
182938fd1498Szrj     {
183038fd1498Szrj       vect_memory_access_type memory_access_type
183138fd1498Szrj 	= (STMT_VINFO_STRIDED_P (stmt_info)
183238fd1498Szrj 	   ? VMAT_STRIDED_SLP
183338fd1498Szrj 	   : VMAT_CONTIGUOUS);
183438fd1498Szrj       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
183538fd1498Szrj 	vect_model_store_cost (stmt_info, ncopies_for_cost,
183638fd1498Szrj 			       memory_access_type, VLS_STORE,
183738fd1498Szrj 			       node, prologue_cost_vec, body_cost_vec);
183838fd1498Szrj       else
183938fd1498Szrj 	{
184038fd1498Szrj 	  gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
184138fd1498Szrj 	  if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
184238fd1498Szrj 	    {
184338fd1498Szrj 	      /* If the load is permuted then the alignment is determined by
184438fd1498Szrj 		 the first group element not by the first scalar stmt DR.  */
184538fd1498Szrj 	      stmt = GROUP_FIRST_ELEMENT (stmt_info);
184638fd1498Szrj 	      stmt_info = vinfo_for_stmt (stmt);
184738fd1498Szrj 	      /* Record the cost for the permutation.  */
184838fd1498Szrj 	      unsigned n_perms;
184938fd1498Szrj 	      vect_transform_slp_perm_load (node, vNULL, NULL,
185038fd1498Szrj 					    ncopies_for_cost, instance, true,
185138fd1498Szrj 					    &n_perms);
185238fd1498Szrj 	      record_stmt_cost (body_cost_vec, n_perms, vec_perm,
185338fd1498Szrj 				stmt_info, 0, vect_body);
185438fd1498Szrj 	      unsigned assumed_nunits
185538fd1498Szrj 		= vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
185638fd1498Szrj 	      /* And adjust the number of loads performed.  This handles
185738fd1498Szrj 	         redundancies as well as loads that are later dead.  */
185838fd1498Szrj 	      auto_sbitmap perm (GROUP_SIZE (stmt_info));
185938fd1498Szrj 	      bitmap_clear (perm);
186038fd1498Szrj 	      for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
186138fd1498Szrj 		bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
186238fd1498Szrj 	      ncopies_for_cost = 0;
186338fd1498Szrj 	      bool load_seen = false;
186438fd1498Szrj 	      for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
186538fd1498Szrj 		{
186638fd1498Szrj 		  if (i % assumed_nunits == 0)
186738fd1498Szrj 		    {
186838fd1498Szrj 		      if (load_seen)
186938fd1498Szrj 			ncopies_for_cost++;
187038fd1498Szrj 		      load_seen = false;
187138fd1498Szrj 		    }
187238fd1498Szrj 		  if (bitmap_bit_p (perm, i))
187338fd1498Szrj 		    load_seen = true;
187438fd1498Szrj 		}
187538fd1498Szrj 	      if (load_seen)
187638fd1498Szrj 		ncopies_for_cost++;
187738fd1498Szrj 	      gcc_assert (ncopies_for_cost
187838fd1498Szrj 			  <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
187938fd1498Szrj 			      + assumed_nunits - 1) / assumed_nunits);
188038fd1498Szrj 	      poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
188138fd1498Szrj 	      ncopies_for_cost *= estimated_poly_value (uf);
188238fd1498Szrj 	    }
188338fd1498Szrj 	  /* Record the cost for the vector loads.  */
188438fd1498Szrj 	  vect_model_load_cost (stmt_info, ncopies_for_cost,
188538fd1498Szrj 				memory_access_type, node, prologue_cost_vec,
188638fd1498Szrj 				body_cost_vec);
188738fd1498Szrj 	  return;
188838fd1498Szrj 	}
188938fd1498Szrj     }
189038fd1498Szrj   else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
189138fd1498Szrj     {
189238fd1498Szrj       /* ncopies_for_cost is the number of IVs we generate.  */
189338fd1498Szrj       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
189438fd1498Szrj 			stmt_info, 0, vect_body);
189538fd1498Szrj 
189638fd1498Szrj       /* Prologue cost for the initial values and step vector.  */
189738fd1498Szrj       record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
189838fd1498Szrj 			CONSTANT_CLASS_P
189938fd1498Szrj 			  (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
190038fd1498Szrj 			     (stmt_info))
190138fd1498Szrj 			? vector_load : vec_construct,
190238fd1498Szrj 			stmt_info, 0, vect_prologue);
190338fd1498Szrj       record_stmt_cost (prologue_cost_vec, 1,
190438fd1498Szrj 			CONSTANT_CLASS_P
190538fd1498Szrj 			  (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
190638fd1498Szrj 			? vector_load : vec_construct,
190738fd1498Szrj 			stmt_info, 0, vect_prologue);
190838fd1498Szrj 
190938fd1498Szrj       /* ???  No easy way to get at the actual number of vector stmts
191038fd1498Szrj          to be geneated and thus the derived IVs.  */
191138fd1498Szrj     }
191238fd1498Szrj   else
191338fd1498Szrj     {
191438fd1498Szrj       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
191538fd1498Szrj 			stmt_info, 0, vect_body);
191638fd1498Szrj       if (SLP_TREE_TWO_OPERATORS (node))
191738fd1498Szrj 	{
191838fd1498Szrj 	  record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
191938fd1498Szrj 			    stmt_info, 0, vect_body);
192038fd1498Szrj 	  record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
192138fd1498Szrj 			    stmt_info, 0, vect_body);
192238fd1498Szrj 	}
192338fd1498Szrj     }
192438fd1498Szrj 
192538fd1498Szrj   /* Push SLP node def-type to stmts.  */
192638fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
192738fd1498Szrj     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
192838fd1498Szrj       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
192938fd1498Szrj 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
193038fd1498Szrj 
193138fd1498Szrj   /* Scan operands and account for prologue cost of constants/externals.
193238fd1498Szrj      ???  This over-estimates cost for multiple uses and should be
193338fd1498Szrj      re-engineered.  */
193438fd1498Szrj   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
193538fd1498Szrj   lhs = gimple_get_lhs (stmt);
193638fd1498Szrj   for (i = 0; i < gimple_num_ops (stmt); ++i)
193738fd1498Szrj     {
193838fd1498Szrj       tree op = gimple_op (stmt, i);
193938fd1498Szrj       gimple *def_stmt;
194038fd1498Szrj       enum vect_def_type dt;
194138fd1498Szrj       if (!op || op == lhs)
194238fd1498Szrj 	continue;
194338fd1498Szrj       if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
194438fd1498Szrj 	  && (dt == vect_constant_def || dt == vect_external_def))
194538fd1498Szrj 	{
194638fd1498Szrj 	  /* Without looking at the actual initializer a vector of
194738fd1498Szrj 	     constants can be implemented as load from the constant pool.
194838fd1498Szrj 	     When all elements are the same we can use a splat.  */
194938fd1498Szrj 	  tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
195038fd1498Szrj 	  unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
195138fd1498Szrj 	  unsigned num_vects_to_check;
195238fd1498Szrj 	  unsigned HOST_WIDE_INT const_nunits;
195338fd1498Szrj 	  unsigned nelt_limit;
195438fd1498Szrj 	  if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
195538fd1498Szrj 	      && ! multiple_p (const_nunits, group_size))
195638fd1498Szrj 	    {
195738fd1498Szrj 	      num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
195838fd1498Szrj 	      nelt_limit = const_nunits;
195938fd1498Szrj 	    }
196038fd1498Szrj 	  else
196138fd1498Szrj 	    {
196238fd1498Szrj 	      /* If either the vector has variable length or the vectors
196338fd1498Szrj 	         are composed of repeated whole groups we only need to
196438fd1498Szrj 		 cost construction once.  All vectors will be the same.  */
196538fd1498Szrj 	      num_vects_to_check = 1;
196638fd1498Szrj 	      nelt_limit = group_size;
196738fd1498Szrj 	    }
196838fd1498Szrj 	  tree elt = NULL_TREE;
196938fd1498Szrj 	  unsigned nelt = 0;
197038fd1498Szrj 	  for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
197138fd1498Szrj 	    {
197238fd1498Szrj 	      unsigned si = j % group_size;
197338fd1498Szrj 	      if (nelt == 0)
197438fd1498Szrj 		elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i);
197538fd1498Szrj 	      /* ???  We're just tracking whether all operands of a single
197638fd1498Szrj 		 vector initializer are the same, ideally we'd check if
197738fd1498Szrj 		 we emitted the same one already.  */
197838fd1498Szrj 	      else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i))
197938fd1498Szrj 		elt = NULL_TREE;
198038fd1498Szrj 	      nelt++;
198138fd1498Szrj 	      if (nelt == nelt_limit)
198238fd1498Szrj 		{
198338fd1498Szrj 		  /* ???  We need to pass down stmt_info for a vector type
198438fd1498Szrj 		     even if it points to the wrong stmt.  */
198538fd1498Szrj 		  record_stmt_cost (prologue_cost_vec, 1,
198638fd1498Szrj 				    dt == vect_external_def
198738fd1498Szrj 				    ? (elt ? scalar_to_vec : vec_construct)
198838fd1498Szrj 				    : vector_load,
198938fd1498Szrj 				    stmt_info, 0, vect_prologue);
199038fd1498Szrj 		  nelt = 0;
199138fd1498Szrj 		}
199238fd1498Szrj 	    }
199338fd1498Szrj 	}
199438fd1498Szrj     }
199538fd1498Szrj 
199638fd1498Szrj   /* Restore stmt def-types.  */
199738fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
199838fd1498Szrj     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
199938fd1498Szrj       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
200038fd1498Szrj 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
200138fd1498Szrj }
200238fd1498Szrj 
200338fd1498Szrj /* Compute the cost for the SLP instance INSTANCE.  */
200438fd1498Szrj 
200538fd1498Szrj static void
vect_analyze_slp_cost(slp_instance instance,void * data,scalar_stmts_set_t * visited)200638fd1498Szrj vect_analyze_slp_cost (slp_instance instance, void *data, scalar_stmts_set_t *visited)
200738fd1498Szrj {
200838fd1498Szrj   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
200938fd1498Szrj   unsigned ncopies_for_cost;
201038fd1498Szrj   stmt_info_for_cost *si;
201138fd1498Szrj   unsigned i;
201238fd1498Szrj 
201338fd1498Szrj   /* Calculate the number of vector stmts to create based on the unrolling
201438fd1498Szrj      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
201538fd1498Szrj      GROUP_SIZE / NUNITS otherwise.  */
201638fd1498Szrj   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
201738fd1498Szrj   slp_tree node = SLP_INSTANCE_TREE (instance);
201838fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
201938fd1498Szrj   /* Get the estimated vectorization factor, which is always one for
202038fd1498Szrj      basic-block vectorization.  */
202138fd1498Szrj   unsigned int assumed_vf;
202238fd1498Szrj   if (STMT_VINFO_LOOP_VINFO (stmt_info))
202338fd1498Szrj     assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
202438fd1498Szrj   else
202538fd1498Szrj     assumed_vf = 1;
202638fd1498Szrj   /* For reductions look at a reduction operand in case the reduction
202738fd1498Szrj      operation is widening like DOT_PROD or SAD.  */
202838fd1498Szrj   tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
202938fd1498Szrj   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
203038fd1498Szrj     {
203138fd1498Szrj       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
203238fd1498Szrj       switch (gimple_assign_rhs_code (stmt))
203338fd1498Szrj 	{
203438fd1498Szrj 	case DOT_PROD_EXPR:
203538fd1498Szrj 	case SAD_EXPR:
203638fd1498Szrj 	  vectype_for_cost = get_vectype_for_scalar_type
203738fd1498Szrj 	    (TREE_TYPE (gimple_assign_rhs1 (stmt)));
203838fd1498Szrj 	  break;
203938fd1498Szrj 	default:;
204038fd1498Szrj 	}
204138fd1498Szrj     }
204238fd1498Szrj   unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
204338fd1498Szrj   ncopies_for_cost = (least_common_multiple (assumed_nunits,
204438fd1498Szrj 					     group_size * assumed_vf)
204538fd1498Szrj 		      / assumed_nunits);
204638fd1498Szrj 
204738fd1498Szrj   prologue_cost_vec.create (10);
204838fd1498Szrj   body_cost_vec.create (10);
204938fd1498Szrj   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
205038fd1498Szrj 			   &prologue_cost_vec, &body_cost_vec,
205138fd1498Szrj 			   ncopies_for_cost, visited);
205238fd1498Szrj 
205338fd1498Szrj   /* Record the prologue costs, which were delayed until we were
205438fd1498Szrj      sure that SLP was successful.  */
205538fd1498Szrj   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
205638fd1498Szrj     {
205738fd1498Szrj       struct _stmt_vec_info *stmt_info
205838fd1498Szrj 	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
205938fd1498Szrj       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
206038fd1498Szrj 			    si->misalign, vect_prologue);
206138fd1498Szrj     }
206238fd1498Szrj 
206338fd1498Szrj   /* Record the instance's instructions in the target cost model.  */
206438fd1498Szrj   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
206538fd1498Szrj     {
206638fd1498Szrj       struct _stmt_vec_info *stmt_info
206738fd1498Szrj 	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
206838fd1498Szrj       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
206938fd1498Szrj 			    si->misalign, vect_body);
207038fd1498Szrj     }
207138fd1498Szrj 
207238fd1498Szrj   prologue_cost_vec.release ();
207338fd1498Szrj   body_cost_vec.release ();
207438fd1498Szrj }
207538fd1498Szrj 
207638fd1498Szrj /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
207738fd1498Szrj    one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
207838fd1498Szrj    the first GROUP1_SIZE stmts, since stores are consecutive), the second
207938fd1498Szrj    containing the remainder.
208038fd1498Szrj    Return the first stmt in the second group.  */
208138fd1498Szrj 
208238fd1498Szrj static gimple *
vect_split_slp_store_group(gimple * first_stmt,unsigned group1_size)208338fd1498Szrj vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
208438fd1498Szrj {
208538fd1498Szrj   stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
208638fd1498Szrj   gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
208738fd1498Szrj   gcc_assert (group1_size > 0);
208838fd1498Szrj   int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
208938fd1498Szrj   gcc_assert (group2_size > 0);
209038fd1498Szrj   GROUP_SIZE (first_vinfo) = group1_size;
209138fd1498Szrj 
209238fd1498Szrj   gimple *stmt = first_stmt;
209338fd1498Szrj   for (unsigned i = group1_size; i > 1; i--)
209438fd1498Szrj     {
209538fd1498Szrj       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
209638fd1498Szrj       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
209738fd1498Szrj     }
209838fd1498Szrj   /* STMT is now the last element of the first group.  */
209938fd1498Szrj   gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
210038fd1498Szrj   GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
210138fd1498Szrj 
210238fd1498Szrj   GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
210338fd1498Szrj   for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
210438fd1498Szrj     {
210538fd1498Szrj       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
210638fd1498Szrj       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
210738fd1498Szrj     }
210838fd1498Szrj 
210938fd1498Szrj   /* For the second group, the GROUP_GAP is that before the original group,
211038fd1498Szrj      plus skipping over the first vector.  */
211138fd1498Szrj   GROUP_GAP (vinfo_for_stmt (group2)) =
211238fd1498Szrj     GROUP_GAP (first_vinfo) + group1_size;
211338fd1498Szrj 
211438fd1498Szrj   /* GROUP_GAP of the first group now has to skip over the second group too.  */
211538fd1498Szrj   GROUP_GAP (first_vinfo) += group2_size;
211638fd1498Szrj 
211738fd1498Szrj   if (dump_enabled_p ())
211838fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
211938fd1498Szrj 		     group1_size, group2_size);
212038fd1498Szrj 
212138fd1498Szrj   return group2;
212238fd1498Szrj }
212338fd1498Szrj 
212438fd1498Szrj /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
212538fd1498Szrj    statements and a vector of NUNITS elements.  */
212638fd1498Szrj 
212738fd1498Szrj static poly_uint64
calculate_unrolling_factor(poly_uint64 nunits,unsigned int group_size)212838fd1498Szrj calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
212938fd1498Szrj {
213038fd1498Szrj   return exact_div (common_multiple (nunits, group_size), group_size);
213138fd1498Szrj }
213238fd1498Szrj 
213338fd1498Szrj /* Analyze an SLP instance starting from a group of grouped stores.  Call
213438fd1498Szrj    vect_build_slp_tree to build a tree of packed stmts if possible.
213538fd1498Szrj    Return FALSE if it's impossible to SLP any stmt in the loop.  */
213638fd1498Szrj 
213738fd1498Szrj static bool
vect_analyze_slp_instance(vec_info * vinfo,gimple * stmt,unsigned max_tree_size)213838fd1498Szrj vect_analyze_slp_instance (vec_info *vinfo,
213938fd1498Szrj 			   gimple *stmt, unsigned max_tree_size)
214038fd1498Szrj {
214138fd1498Szrj   slp_instance new_instance;
214238fd1498Szrj   slp_tree node;
214338fd1498Szrj   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
214438fd1498Szrj   tree vectype, scalar_type = NULL_TREE;
214538fd1498Szrj   gimple *next;
214638fd1498Szrj   unsigned int i;
214738fd1498Szrj   vec<slp_tree> loads;
214838fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
214938fd1498Szrj   vec<gimple *> scalar_stmts;
215038fd1498Szrj 
215138fd1498Szrj   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
215238fd1498Szrj     {
215338fd1498Szrj       if (dr)
215438fd1498Szrj         {
215538fd1498Szrj           scalar_type = TREE_TYPE (DR_REF (dr));
215638fd1498Szrj           vectype = get_vectype_for_scalar_type (scalar_type);
215738fd1498Szrj         }
215838fd1498Szrj       else
215938fd1498Szrj         {
216038fd1498Szrj           gcc_assert (is_a <loop_vec_info> (vinfo));
216138fd1498Szrj           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
216238fd1498Szrj         }
216338fd1498Szrj 
216438fd1498Szrj       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
216538fd1498Szrj     }
216638fd1498Szrj   else
216738fd1498Szrj     {
216838fd1498Szrj       gcc_assert (is_a <loop_vec_info> (vinfo));
216938fd1498Szrj       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
217038fd1498Szrj       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
217138fd1498Szrj     }
217238fd1498Szrj 
217338fd1498Szrj   if (!vectype)
217438fd1498Szrj     {
217538fd1498Szrj       if (dump_enabled_p ())
217638fd1498Szrj         {
217738fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
217838fd1498Szrj 			   "Build SLP failed: unsupported data-type ");
217938fd1498Szrj           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
218038fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
218138fd1498Szrj         }
218238fd1498Szrj 
218338fd1498Szrj       return false;
218438fd1498Szrj     }
218538fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
218638fd1498Szrj 
218738fd1498Szrj   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
218838fd1498Szrj   scalar_stmts.create (group_size);
218938fd1498Szrj   next = stmt;
219038fd1498Szrj   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
219138fd1498Szrj     {
219238fd1498Szrj       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
219338fd1498Szrj       while (next)
219438fd1498Szrj         {
219538fd1498Szrj 	  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
219638fd1498Szrj 	      && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
219738fd1498Szrj 	    scalar_stmts.safe_push (
219838fd1498Szrj 		  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
219938fd1498Szrj 	  else
220038fd1498Szrj             scalar_stmts.safe_push (next);
220138fd1498Szrj           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
220238fd1498Szrj         }
220338fd1498Szrj       /* Mark the first element of the reduction chain as reduction to properly
220438fd1498Szrj 	 transform the node.  In the reduction analysis phase only the last
220538fd1498Szrj 	 element of the chain is marked as reduction.  */
220638fd1498Szrj       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
220738fd1498Szrj 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
220838fd1498Szrj     }
220938fd1498Szrj   else
221038fd1498Szrj     {
221138fd1498Szrj       /* Collect reduction statements.  */
221238fd1498Szrj       vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
221338fd1498Szrj       for (i = 0; reductions.iterate (i, &next); i++)
221438fd1498Szrj 	scalar_stmts.safe_push (next);
221538fd1498Szrj     }
221638fd1498Szrj 
221738fd1498Szrj   loads.create (group_size);
221838fd1498Szrj 
221938fd1498Szrj   /* Build the tree for the SLP instance.  */
222038fd1498Szrj   bool *matches = XALLOCAVEC (bool, group_size);
222138fd1498Szrj   unsigned npermutes = 0;
222238fd1498Szrj   bst_fail = new scalar_stmts_set_t ();
222338fd1498Szrj   poly_uint64 max_nunits = nunits;
222438fd1498Szrj   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
222538fd1498Szrj 			      &max_nunits, &loads, matches, &npermutes,
222638fd1498Szrj 			      NULL, max_tree_size);
222738fd1498Szrj   delete bst_fail;
222838fd1498Szrj   if (node != NULL)
222938fd1498Szrj     {
223038fd1498Szrj       /* Calculate the unrolling factor based on the smallest type.  */
223138fd1498Szrj       poly_uint64 unrolling_factor
223238fd1498Szrj 	= calculate_unrolling_factor (max_nunits, group_size);
223338fd1498Szrj 
223438fd1498Szrj       if (maybe_ne (unrolling_factor, 1U)
223538fd1498Szrj 	  && is_a <bb_vec_info> (vinfo))
223638fd1498Szrj 	{
223738fd1498Szrj 	  unsigned HOST_WIDE_INT const_max_nunits;
223838fd1498Szrj 	  if (!max_nunits.is_constant (&const_max_nunits)
223938fd1498Szrj 	      || const_max_nunits > group_size)
224038fd1498Szrj 	    {
224138fd1498Szrj 	      if (dump_enabled_p ())
224238fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
224338fd1498Szrj 				 "Build SLP failed: store group "
224438fd1498Szrj 				 "size not a multiple of the vector size "
224538fd1498Szrj 				 "in basic block SLP\n");
224638fd1498Szrj 	      vect_free_slp_tree (node);
224738fd1498Szrj 	      loads.release ();
224838fd1498Szrj 	      return false;
224938fd1498Szrj 	    }
225038fd1498Szrj 	  /* Fatal mismatch.  */
225138fd1498Szrj 	  matches[group_size / const_max_nunits * const_max_nunits] = false;
225238fd1498Szrj 	  vect_free_slp_tree (node);
225338fd1498Szrj 	  loads.release ();
225438fd1498Szrj 	}
225538fd1498Szrj       else
225638fd1498Szrj 	{
225738fd1498Szrj       /* Create a new SLP instance.  */
225838fd1498Szrj       new_instance = XNEW (struct _slp_instance);
225938fd1498Szrj       SLP_INSTANCE_TREE (new_instance) = node;
226038fd1498Szrj       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
226138fd1498Szrj       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
226238fd1498Szrj       SLP_INSTANCE_LOADS (new_instance) = loads;
226338fd1498Szrj 
226438fd1498Szrj       /* Compute the load permutation.  */
226538fd1498Szrj       slp_tree load_node;
226638fd1498Szrj       bool loads_permuted = false;
226738fd1498Szrj       FOR_EACH_VEC_ELT (loads, i, load_node)
226838fd1498Szrj 	{
226938fd1498Szrj 	  vec<unsigned> load_permutation;
227038fd1498Szrj 	  int j;
227138fd1498Szrj 	  gimple *load, *first_stmt;
227238fd1498Szrj 	  bool this_load_permuted = false;
227338fd1498Szrj 	  load_permutation.create (group_size);
227438fd1498Szrj 	  first_stmt = GROUP_FIRST_ELEMENT
227538fd1498Szrj 	      (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
227638fd1498Szrj 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
227738fd1498Szrj 	    {
227838fd1498Szrj 		  int load_place = vect_get_place_in_interleaving_chain
227938fd1498Szrj 				     (load, first_stmt);
228038fd1498Szrj 	      gcc_assert (load_place != -1);
228138fd1498Szrj 	      if (load_place != j)
228238fd1498Szrj 		this_load_permuted = true;
228338fd1498Szrj 	      load_permutation.safe_push (load_place);
228438fd1498Szrj 	    }
228538fd1498Szrj 	  if (!this_load_permuted
228638fd1498Szrj 	      /* The load requires permutation when unrolling exposes
228738fd1498Szrj 	         a gap either because the group is larger than the SLP
228838fd1498Szrj 		 group-size or because there is a gap between the groups.  */
228938fd1498Szrj 	      && (known_eq (unrolling_factor, 1U)
229038fd1498Szrj 		  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
229138fd1498Szrj 		      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
229238fd1498Szrj 	    {
229338fd1498Szrj 	      load_permutation.release ();
229438fd1498Szrj 	      continue;
229538fd1498Szrj 	    }
229638fd1498Szrj 	  SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
229738fd1498Szrj 	  loads_permuted = true;
229838fd1498Szrj 	}
229938fd1498Szrj 
230038fd1498Szrj       if (loads_permuted)
230138fd1498Szrj         {
230238fd1498Szrj           if (!vect_supported_load_permutation_p (new_instance))
230338fd1498Szrj             {
230438fd1498Szrj               if (dump_enabled_p ())
230538fd1498Szrj                 {
230638fd1498Szrj                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
230738fd1498Szrj 				   "Build SLP failed: unsupported load "
230838fd1498Szrj 				   "permutation ");
230938fd1498Szrj 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
231038fd1498Szrj 					TDF_SLIM, stmt, 0);
231138fd1498Szrj                 }
231238fd1498Szrj               vect_free_slp_instance (new_instance);
231338fd1498Szrj               return false;
231438fd1498Szrj             }
231538fd1498Szrj         }
231638fd1498Szrj 
231738fd1498Szrj 	  /* If the loads and stores can be handled with load/store-lan
231838fd1498Szrj 	 instructions do not generate this SLP instance.  */
231938fd1498Szrj       if (is_a <loop_vec_info> (vinfo)
232038fd1498Szrj 	  && loads_permuted
232138fd1498Szrj 	  && dr && vect_store_lanes_supported (vectype, group_size, false))
232238fd1498Szrj 	{
232338fd1498Szrj 	  slp_tree load_node;
232438fd1498Szrj 	  FOR_EACH_VEC_ELT (loads, i, load_node)
232538fd1498Szrj 	    {
232638fd1498Szrj 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
232738fd1498Szrj 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
232838fd1498Szrj 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
232938fd1498Szrj 		  /* Use SLP for strided accesses (or if we
233038fd1498Szrj 		     can't load-lanes).  */
233138fd1498Szrj 	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
233238fd1498Szrj 		  || ! vect_load_lanes_supported
233338fd1498Szrj 			(STMT_VINFO_VECTYPE (stmt_vinfo),
233438fd1498Szrj 			 GROUP_SIZE (stmt_vinfo), false))
233538fd1498Szrj 		break;
233638fd1498Szrj 	    }
233738fd1498Szrj 	  if (i == loads.length ())
233838fd1498Szrj 	    {
233938fd1498Szrj 	      if (dump_enabled_p ())
234038fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
234138fd1498Szrj 				 "Built SLP cancelled: can use "
234238fd1498Szrj 				 "load/store-lanes\n");
234338fd1498Szrj 	      vect_free_slp_instance (new_instance);
234438fd1498Szrj 	      return false;
234538fd1498Szrj 	    }
234638fd1498Szrj 	}
234738fd1498Szrj 
234838fd1498Szrj       vinfo->slp_instances.safe_push (new_instance);
234938fd1498Szrj 
235038fd1498Szrj       if (dump_enabled_p ())
235138fd1498Szrj 	{
235238fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
235338fd1498Szrj 			   "Final SLP tree for instance:\n");
235438fd1498Szrj 	  vect_print_slp_tree (MSG_NOTE, vect_location, node);
235538fd1498Szrj 	}
235638fd1498Szrj 
235738fd1498Szrj       return true;
235838fd1498Szrj     }
235938fd1498Szrj     }
236038fd1498Szrj   else
236138fd1498Szrj     {
236238fd1498Szrj   /* Failed to SLP.  */
236338fd1498Szrj   /* Free the allocated memory.  */
236438fd1498Szrj   scalar_stmts.release ();
236538fd1498Szrj   loads.release ();
236638fd1498Szrj     }
236738fd1498Szrj 
236838fd1498Szrj   /* For basic block SLP, try to break the group up into multiples of the
236938fd1498Szrj      vector size.  */
237038fd1498Szrj   unsigned HOST_WIDE_INT const_nunits;
237138fd1498Szrj   if (is_a <bb_vec_info> (vinfo)
237238fd1498Szrj       && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
237338fd1498Szrj       && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
237438fd1498Szrj       && nunits.is_constant (&const_nunits))
237538fd1498Szrj     {
237638fd1498Szrj       /* We consider breaking the group only on VF boundaries from the existing
237738fd1498Szrj 	 start.  */
237838fd1498Szrj       for (i = 0; i < group_size; i++)
237938fd1498Szrj 	if (!matches[i]) break;
238038fd1498Szrj 
238138fd1498Szrj       if (i >= const_nunits && i < group_size)
238238fd1498Szrj 	{
238338fd1498Szrj 	  /* Split into two groups at the first vector boundary before i.  */
238438fd1498Szrj 	  gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
238538fd1498Szrj 	  unsigned group1_size = i & ~(const_nunits - 1);
238638fd1498Szrj 
238738fd1498Szrj 	  gimple *rest = vect_split_slp_store_group (stmt, group1_size);
238838fd1498Szrj 	  bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
238938fd1498Szrj 	  /* If the first non-match was in the middle of a vector,
239038fd1498Szrj 	     skip the rest of that vector.  */
239138fd1498Szrj 	  if (group1_size < i)
239238fd1498Szrj 	    {
239338fd1498Szrj 	      i = group1_size + const_nunits;
239438fd1498Szrj 	      if (i < group_size)
239538fd1498Szrj 		rest = vect_split_slp_store_group (rest, const_nunits);
239638fd1498Szrj 	    }
239738fd1498Szrj 	  if (i < group_size)
239838fd1498Szrj 	    res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
239938fd1498Szrj 	  return res;
240038fd1498Szrj 	}
240138fd1498Szrj       /* Even though the first vector did not all match, we might be able to SLP
240238fd1498Szrj 	 (some) of the remainder.  FORNOW ignore this possibility.  */
240338fd1498Szrj     }
240438fd1498Szrj 
240538fd1498Szrj   return false;
240638fd1498Szrj }
240738fd1498Szrj 
240838fd1498Szrj 
240938fd1498Szrj /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
241038fd1498Szrj    trees of packed scalar stmts if SLP is possible.  */
241138fd1498Szrj 
241238fd1498Szrj bool
vect_analyze_slp(vec_info * vinfo,unsigned max_tree_size)241338fd1498Szrj vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
241438fd1498Szrj {
241538fd1498Szrj   unsigned int i;
241638fd1498Szrj   gimple *first_element;
241738fd1498Szrj 
241838fd1498Szrj   if (dump_enabled_p ())
241938fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
242038fd1498Szrj 
242138fd1498Szrj   /* Find SLP sequences starting from groups of grouped stores.  */
242238fd1498Szrj   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
242338fd1498Szrj     vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
242438fd1498Szrj 
242538fd1498Szrj   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
242638fd1498Szrj     {
242738fd1498Szrj       if (loop_vinfo->reduction_chains.length () > 0)
242838fd1498Szrj 	{
242938fd1498Szrj 	  /* Find SLP sequences starting from reduction chains.  */
243038fd1498Szrj 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
243138fd1498Szrj 	    if (! vect_analyze_slp_instance (vinfo, first_element,
243238fd1498Szrj 					     max_tree_size))
243338fd1498Szrj 	      {
243438fd1498Szrj 		/* Dissolve reduction chain group.  */
243538fd1498Szrj 		gimple *next, *stmt = first_element;
243638fd1498Szrj 		while (stmt)
243738fd1498Szrj 		  {
243838fd1498Szrj 		    stmt_vec_info vinfo = vinfo_for_stmt (stmt);
243938fd1498Szrj 		    next = GROUP_NEXT_ELEMENT (vinfo);
244038fd1498Szrj 		    GROUP_FIRST_ELEMENT (vinfo) = NULL;
244138fd1498Szrj 		    GROUP_NEXT_ELEMENT (vinfo) = NULL;
244238fd1498Szrj 		    stmt = next;
244338fd1498Szrj 		  }
244438fd1498Szrj 		STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
244538fd1498Szrj 		  = vect_internal_def;
244638fd1498Szrj 	      }
244738fd1498Szrj 	}
244838fd1498Szrj 
244938fd1498Szrj       /* Find SLP sequences starting from groups of reductions.  */
245038fd1498Szrj       if (loop_vinfo->reductions.length () > 1)
245138fd1498Szrj 	vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
245238fd1498Szrj 				   max_tree_size);
245338fd1498Szrj     }
245438fd1498Szrj 
245538fd1498Szrj   return true;
245638fd1498Szrj }
245738fd1498Szrj 
245838fd1498Szrj 
245938fd1498Szrj /* For each possible SLP instance decide whether to SLP it and calculate overall
246038fd1498Szrj    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
246138fd1498Szrj    least one instance.  */
246238fd1498Szrj 
246338fd1498Szrj bool
vect_make_slp_decision(loop_vec_info loop_vinfo)246438fd1498Szrj vect_make_slp_decision (loop_vec_info loop_vinfo)
246538fd1498Szrj {
246638fd1498Szrj   unsigned int i;
246738fd1498Szrj   poly_uint64 unrolling_factor = 1;
246838fd1498Szrj   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
246938fd1498Szrj   slp_instance instance;
247038fd1498Szrj   int decided_to_slp = 0;
247138fd1498Szrj 
247238fd1498Szrj   if (dump_enabled_p ())
247338fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
247438fd1498Szrj                      "\n");
247538fd1498Szrj 
247638fd1498Szrj   FOR_EACH_VEC_ELT (slp_instances, i, instance)
247738fd1498Szrj     {
247838fd1498Szrj       /* FORNOW: SLP if you can.  */
247938fd1498Szrj       /* All unroll factors have the form current_vector_size * X for some
248038fd1498Szrj 	 rational X, so they must have a common multiple.  */
248138fd1498Szrj       unrolling_factor
248238fd1498Szrj 	= force_common_multiple (unrolling_factor,
248338fd1498Szrj 				 SLP_INSTANCE_UNROLLING_FACTOR (instance));
248438fd1498Szrj 
248538fd1498Szrj       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
248638fd1498Szrj 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
248738fd1498Szrj 	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
248838fd1498Szrj       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
248938fd1498Szrj       decided_to_slp++;
249038fd1498Szrj     }
249138fd1498Szrj 
249238fd1498Szrj   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
249338fd1498Szrj 
249438fd1498Szrj   if (decided_to_slp && dump_enabled_p ())
249538fd1498Szrj     {
249638fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
249738fd1498Szrj 		       "Decided to SLP %d instances. Unrolling factor ",
249838fd1498Szrj 		       decided_to_slp);
249938fd1498Szrj       dump_dec (MSG_NOTE, unrolling_factor);
250038fd1498Szrj       dump_printf (MSG_NOTE, "\n");
250138fd1498Szrj     }
250238fd1498Szrj 
250338fd1498Szrj   return (decided_to_slp > 0);
250438fd1498Szrj }
250538fd1498Szrj 
250638fd1498Szrj 
250738fd1498Szrj /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
250838fd1498Szrj    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
250938fd1498Szrj 
251038fd1498Szrj static void
vect_detect_hybrid_slp_stmts(slp_tree node,unsigned i,slp_vect_type stype)251138fd1498Szrj vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
251238fd1498Szrj {
251338fd1498Szrj   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
251438fd1498Szrj   imm_use_iterator imm_iter;
251538fd1498Szrj   gimple *use_stmt;
251638fd1498Szrj   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
251738fd1498Szrj   slp_tree child;
251838fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
251938fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
252038fd1498Szrj   int j;
252138fd1498Szrj 
252238fd1498Szrj   /* Propagate hybrid down the SLP tree.  */
252338fd1498Szrj   if (stype == hybrid)
252438fd1498Szrj     ;
252538fd1498Szrj   else if (HYBRID_SLP_STMT (stmt_vinfo))
252638fd1498Szrj     stype = hybrid;
252738fd1498Szrj   else
252838fd1498Szrj     {
252938fd1498Szrj       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
253038fd1498Szrj       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
253138fd1498Szrj       /* If we get a pattern stmt here we have to use the LHS of the
253238fd1498Szrj          original stmt for immediate uses.  */
253338fd1498Szrj       if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
253438fd1498Szrj 	  && STMT_VINFO_RELATED_STMT (stmt_vinfo))
253538fd1498Szrj 	stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
253638fd1498Szrj       tree def;
253738fd1498Szrj       if (gimple_code (stmt) == GIMPLE_PHI)
253838fd1498Szrj 	def = gimple_phi_result (stmt);
253938fd1498Szrj       else
254038fd1498Szrj 	def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
254138fd1498Szrj       if (def)
254238fd1498Szrj 	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
254338fd1498Szrj 	  {
254438fd1498Szrj 	    if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
254538fd1498Szrj 	      continue;
254638fd1498Szrj 	    use_vinfo = vinfo_for_stmt (use_stmt);
254738fd1498Szrj 	    if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
254838fd1498Szrj 		&& STMT_VINFO_RELATED_STMT (use_vinfo))
254938fd1498Szrj 	      use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
255038fd1498Szrj 	    if (!STMT_SLP_TYPE (use_vinfo)
255138fd1498Szrj 		&& (STMT_VINFO_RELEVANT (use_vinfo)
255238fd1498Szrj 		    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
255338fd1498Szrj 		&& !(gimple_code (use_stmt) == GIMPLE_PHI
255438fd1498Szrj 		     && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
255538fd1498Szrj 	      {
255638fd1498Szrj 		if (dump_enabled_p ())
255738fd1498Szrj 		  {
255838fd1498Szrj 		    dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
255938fd1498Szrj 				     "def in non-SLP stmt: ");
256038fd1498Szrj 		    dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
256138fd1498Szrj 		  }
256238fd1498Szrj 		stype = hybrid;
256338fd1498Szrj 	      }
256438fd1498Szrj 	  }
256538fd1498Szrj     }
256638fd1498Szrj 
256738fd1498Szrj   if (stype == hybrid
256838fd1498Szrj       && !HYBRID_SLP_STMT (stmt_vinfo))
256938fd1498Szrj     {
257038fd1498Szrj       if (dump_enabled_p ())
257138fd1498Szrj 	{
257238fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
257338fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
257438fd1498Szrj 	}
257538fd1498Szrj       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
257638fd1498Szrj     }
257738fd1498Szrj 
257838fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
257938fd1498Szrj     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
258038fd1498Szrj       vect_detect_hybrid_slp_stmts (child, i, stype);
258138fd1498Szrj }
258238fd1498Szrj 
258338fd1498Szrj /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
258438fd1498Szrj 
258538fd1498Szrj static tree
vect_detect_hybrid_slp_1(tree * tp,int *,void * data)258638fd1498Szrj vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
258738fd1498Szrj {
258838fd1498Szrj   walk_stmt_info *wi = (walk_stmt_info *)data;
258938fd1498Szrj   struct loop *loopp = (struct loop *)wi->info;
259038fd1498Szrj 
259138fd1498Szrj   if (wi->is_lhs)
259238fd1498Szrj     return NULL_TREE;
259338fd1498Szrj 
259438fd1498Szrj   if (TREE_CODE (*tp) == SSA_NAME
259538fd1498Szrj       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
259638fd1498Szrj     {
259738fd1498Szrj       gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
259838fd1498Szrj       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
259938fd1498Szrj 	  && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
260038fd1498Szrj 	{
260138fd1498Szrj 	  if (dump_enabled_p ())
260238fd1498Szrj 	    {
260338fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
260438fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
260538fd1498Szrj 	    }
260638fd1498Szrj 	  STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
260738fd1498Szrj 	}
260838fd1498Szrj     }
260938fd1498Szrj 
261038fd1498Szrj   return NULL_TREE;
261138fd1498Szrj }
261238fd1498Szrj 
261338fd1498Szrj static tree
vect_detect_hybrid_slp_2(gimple_stmt_iterator * gsi,bool * handled,walk_stmt_info *)261438fd1498Szrj vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
261538fd1498Szrj 			  walk_stmt_info *)
261638fd1498Szrj {
261738fd1498Szrj   stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
261838fd1498Szrj   /* If the stmt is in a SLP instance then this isn't a reason
261938fd1498Szrj      to mark use definitions in other SLP instances as hybrid.  */
262038fd1498Szrj   if (! STMT_SLP_TYPE (use_vinfo)
262138fd1498Szrj       && (STMT_VINFO_RELEVANT (use_vinfo)
262238fd1498Szrj 	  || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
262338fd1498Szrj       && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
262438fd1498Szrj 	    && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
262538fd1498Szrj     ;
262638fd1498Szrj   else
262738fd1498Szrj     *handled = true;
262838fd1498Szrj   return NULL_TREE;
262938fd1498Szrj }
263038fd1498Szrj 
263138fd1498Szrj /* Find stmts that must be both vectorized and SLPed.  */
263238fd1498Szrj 
263338fd1498Szrj void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)263438fd1498Szrj vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
263538fd1498Szrj {
263638fd1498Szrj   unsigned int i;
263738fd1498Szrj   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
263838fd1498Szrj   slp_instance instance;
263938fd1498Szrj 
264038fd1498Szrj   if (dump_enabled_p ())
264138fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
264238fd1498Szrj                      "\n");
264338fd1498Szrj 
264438fd1498Szrj   /* First walk all pattern stmt in the loop and mark defs of uses as
264538fd1498Szrj      hybrid because immediate uses in them are not recorded.  */
264638fd1498Szrj   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
264738fd1498Szrj     {
264838fd1498Szrj       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
264938fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
265038fd1498Szrj 	   gsi_next (&gsi))
265138fd1498Szrj 	{
265238fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
265338fd1498Szrj 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
265438fd1498Szrj 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
265538fd1498Szrj 	    {
265638fd1498Szrj 	      walk_stmt_info wi;
265738fd1498Szrj 	      memset (&wi, 0, sizeof (wi));
265838fd1498Szrj 	      wi.info = LOOP_VINFO_LOOP (loop_vinfo);
265938fd1498Szrj 	      gimple_stmt_iterator gsi2
266038fd1498Szrj 		= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
266138fd1498Szrj 	      walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
266238fd1498Szrj 				vect_detect_hybrid_slp_1, &wi);
266338fd1498Szrj 	      walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
266438fd1498Szrj 			       vect_detect_hybrid_slp_2,
266538fd1498Szrj 			       vect_detect_hybrid_slp_1, &wi);
266638fd1498Szrj 	    }
266738fd1498Szrj 	}
266838fd1498Szrj     }
266938fd1498Szrj 
267038fd1498Szrj   /* Then walk the SLP instance trees marking stmts with uses in
267138fd1498Szrj      non-SLP stmts as hybrid, also propagating hybrid down the
267238fd1498Szrj      SLP tree, collecting the above info on-the-fly.  */
267338fd1498Szrj   FOR_EACH_VEC_ELT (slp_instances, i, instance)
267438fd1498Szrj     {
267538fd1498Szrj       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
267638fd1498Szrj 	vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
267738fd1498Szrj 				      i, pure_slp);
267838fd1498Szrj     }
267938fd1498Szrj }
268038fd1498Szrj 
268138fd1498Szrj 
268238fd1498Szrj /* Initialize a bb_vec_info struct for the statements between
268338fd1498Szrj    REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive).  */
268438fd1498Szrj 
_bb_vec_info(gimple_stmt_iterator region_begin_in,gimple_stmt_iterator region_end_in)268538fd1498Szrj _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
268638fd1498Szrj 			    gimple_stmt_iterator region_end_in)
268738fd1498Szrj   : vec_info (vec_info::bb, init_cost (NULL)),
268838fd1498Szrj     bb (gsi_bb (region_begin_in)),
268938fd1498Szrj     region_begin (region_begin_in),
269038fd1498Szrj     region_end (region_end_in)
269138fd1498Szrj {
269238fd1498Szrj   gimple_stmt_iterator gsi;
269338fd1498Szrj 
269438fd1498Szrj   for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
269538fd1498Szrj        gsi_next (&gsi))
269638fd1498Szrj     {
269738fd1498Szrj       gimple *stmt = gsi_stmt (gsi);
269838fd1498Szrj       gimple_set_uid (stmt, 0);
269938fd1498Szrj       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
270038fd1498Szrj     }
270138fd1498Szrj 
270238fd1498Szrj   bb->aux = this;
270338fd1498Szrj }
270438fd1498Szrj 
270538fd1498Szrj 
270638fd1498Szrj /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
270738fd1498Szrj    stmts in the basic block.  */
270838fd1498Szrj 
~_bb_vec_info()270938fd1498Szrj _bb_vec_info::~_bb_vec_info ()
271038fd1498Szrj {
271138fd1498Szrj   for (gimple_stmt_iterator si = region_begin;
271238fd1498Szrj        gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
271338fd1498Szrj     {
271438fd1498Szrj       gimple *stmt = gsi_stmt (si);
271538fd1498Szrj       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
271638fd1498Szrj 
271738fd1498Szrj       if (stmt_info)
271838fd1498Szrj         /* Free stmt_vec_info.  */
271938fd1498Szrj         free_stmt_vec_info (stmt);
272038fd1498Szrj 
272138fd1498Szrj       /* Reset region marker.  */
272238fd1498Szrj       gimple_set_uid (stmt, -1);
272338fd1498Szrj     }
272438fd1498Szrj 
272538fd1498Szrj   bb->aux = NULL;
272638fd1498Szrj }
272738fd1498Szrj 
272838fd1498Szrj 
272938fd1498Szrj /* Analyze statements contained in SLP tree NODE after recursively analyzing
273038fd1498Szrj    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
273138fd1498Szrj 
273238fd1498Szrj    Return true if the operations are supported.  */
273338fd1498Szrj 
273438fd1498Szrj static bool
vect_slp_analyze_node_operations(vec_info * vinfo,slp_tree node,slp_instance node_instance)273538fd1498Szrj vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
273638fd1498Szrj 				  slp_instance node_instance)
273738fd1498Szrj {
273838fd1498Szrj   bool dummy;
273938fd1498Szrj   int i, j;
274038fd1498Szrj   gimple *stmt;
274138fd1498Szrj   slp_tree child;
274238fd1498Szrj 
274338fd1498Szrj   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
274438fd1498Szrj     return true;
274538fd1498Szrj 
274638fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
274738fd1498Szrj     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
274838fd1498Szrj       return false;
274938fd1498Szrj 
275038fd1498Szrj   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
275138fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
275238fd1498Szrj   gcc_assert (stmt_info);
275338fd1498Szrj   gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
275438fd1498Szrj 
275538fd1498Szrj   /* For BB vectorization vector types are assigned here.
275638fd1498Szrj      Memory accesses already got their vector type assigned
275738fd1498Szrj      in vect_analyze_data_refs.  */
275838fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
275938fd1498Szrj   if (bb_vinfo
276038fd1498Szrj       && ! STMT_VINFO_DATA_REF (stmt_info))
276138fd1498Szrj     {
276238fd1498Szrj       gcc_assert (PURE_SLP_STMT (stmt_info));
276338fd1498Szrj 
276438fd1498Szrj       tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
276538fd1498Szrj       if (dump_enabled_p ())
276638fd1498Szrj 	{
276738fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
276838fd1498Szrj 			   "get vectype for scalar type:  ");
276938fd1498Szrj 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
277038fd1498Szrj 	  dump_printf (MSG_NOTE, "\n");
277138fd1498Szrj 	}
277238fd1498Szrj 
277338fd1498Szrj       tree vectype = get_vectype_for_scalar_type (scalar_type);
277438fd1498Szrj       if (!vectype)
277538fd1498Szrj 	{
277638fd1498Szrj 	  if (dump_enabled_p ())
277738fd1498Szrj 	    {
277838fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277938fd1498Szrj 			       "not SLPed: unsupported data-type ");
278038fd1498Szrj 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
278138fd1498Szrj 				 scalar_type);
278238fd1498Szrj 	      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
278338fd1498Szrj 	    }
278438fd1498Szrj 	  return false;
278538fd1498Szrj 	}
278638fd1498Szrj 
278738fd1498Szrj       if (dump_enabled_p ())
278838fd1498Szrj 	{
278938fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location, "vectype:  ");
279038fd1498Szrj 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
279138fd1498Szrj 	  dump_printf (MSG_NOTE, "\n");
279238fd1498Szrj 	}
279338fd1498Szrj 
279438fd1498Szrj       gimple *sstmt;
279538fd1498Szrj       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
279638fd1498Szrj 	STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
279738fd1498Szrj     }
279838fd1498Szrj 
279938fd1498Szrj   /* Calculate the number of vector statements to be created for the
280038fd1498Szrj      scalar stmts in this node.  For SLP reductions it is equal to the
280138fd1498Szrj      number of vector statements in the children (which has already been
280238fd1498Szrj      calculated by the recursive call).  Otherwise it is the number of
280338fd1498Szrj      scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
280438fd1498Szrj      VF divided by the number of elements in a vector.  */
280538fd1498Szrj   if (GROUP_FIRST_ELEMENT (stmt_info)
280638fd1498Szrj       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
280738fd1498Szrj     SLP_TREE_NUMBER_OF_VEC_STMTS (node)
280838fd1498Szrj       = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
280938fd1498Szrj   else
281038fd1498Szrj     {
281138fd1498Szrj       poly_uint64 vf;
281238fd1498Szrj       if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
281338fd1498Szrj 	vf = loop_vinfo->vectorization_factor;
281438fd1498Szrj       else
281538fd1498Szrj 	vf = 1;
281638fd1498Szrj       unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
281738fd1498Szrj       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
281838fd1498Szrj       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
281938fd1498Szrj 	= vect_get_num_vectors (vf * group_size, vectype);
282038fd1498Szrj     }
282138fd1498Szrj 
2822*58e805e6Szrj   /* ???  We have to catch the case late where two first scalar stmts appear
2823*58e805e6Szrj      in multiple SLP children with different def type and fail.  Remember
2824*58e805e6Szrj      original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2825*58e805e6Szrj      match it when that is vect_internal_def.  */
2826*58e805e6Szrj   auto_vec<vect_def_type, 4> dt;
2827*58e805e6Szrj   dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2828*58e805e6Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2829*58e805e6Szrj     dt[j]
2830*58e805e6Szrj       = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]));
2831*58e805e6Szrj 
283238fd1498Szrj   /* Push SLP node def-type to stmt operands.  */
283338fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
283438fd1498Szrj     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
283538fd1498Szrj       STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
283638fd1498Szrj 	= SLP_TREE_DEF_TYPE (child);
2837*58e805e6Szrj 
2838*58e805e6Szrj   /* Check everything worked out.  */
2839*58e805e6Szrj   bool res = true;
284038fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
284138fd1498Szrj    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2842*58e805e6Szrj      {
2843*58e805e6Szrj        if (STMT_VINFO_DEF_TYPE
2844*58e805e6Szrj 	     (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2845*58e805e6Szrj 	   != SLP_TREE_DEF_TYPE (child))
2846*58e805e6Szrj 	 res = false;
2847*58e805e6Szrj      }
2848*58e805e6Szrj    else if (STMT_VINFO_DEF_TYPE
2849*58e805e6Szrj 	      (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) != dt[j])
2850*58e805e6Szrj      res = false;
2851*58e805e6Szrj   if (!res && dump_enabled_p ())
2852*58e805e6Szrj     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2853*58e805e6Szrj 		     "not vectorized: same operand with different "
2854*58e805e6Szrj 		     "def type in stmt.\n");
2855*58e805e6Szrj   if (res)
2856*58e805e6Szrj     res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
285738fd1498Szrj 
2858*58e805e6Szrj   /* Restore def-types.  */
2859*58e805e6Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2860*58e805e6Szrj     STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2861*58e805e6Szrj       = dt[j];
2862*58e805e6Szrj 
2863*58e805e6Szrj   return res;
286438fd1498Szrj }
286538fd1498Szrj 
286638fd1498Szrj 
286738fd1498Szrj /* Analyze statements in SLP instances of VINFO.  Return true if the
286838fd1498Szrj    operations are supported. */
286938fd1498Szrj 
287038fd1498Szrj bool
vect_slp_analyze_operations(vec_info * vinfo)287138fd1498Szrj vect_slp_analyze_operations (vec_info *vinfo)
287238fd1498Szrj {
287338fd1498Szrj   slp_instance instance;
287438fd1498Szrj   int i;
287538fd1498Szrj 
287638fd1498Szrj   if (dump_enabled_p ())
287738fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
287838fd1498Szrj 		     "=== vect_slp_analyze_operations ===\n");
287938fd1498Szrj 
288038fd1498Szrj   for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
288138fd1498Szrj     {
288238fd1498Szrj       if (!vect_slp_analyze_node_operations (vinfo,
288338fd1498Szrj 					     SLP_INSTANCE_TREE (instance),
288438fd1498Szrj 					     instance))
288538fd1498Szrj         {
288638fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
288738fd1498Szrj 			   "removing SLP instance operations starting from: ");
288838fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
288938fd1498Szrj 			    SLP_TREE_SCALAR_STMTS
289038fd1498Szrj 			      (SLP_INSTANCE_TREE (instance))[0], 0);
289138fd1498Szrj 	  vect_free_slp_instance (instance);
289238fd1498Szrj           vinfo->slp_instances.ordered_remove (i);
289338fd1498Szrj 	}
289438fd1498Szrj       else
289538fd1498Szrj 	i++;
289638fd1498Szrj     }
289738fd1498Szrj 
289838fd1498Szrj   if (dump_enabled_p ())
289938fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
290038fd1498Szrj 		     "=== vect_analyze_slp_cost ===\n");
290138fd1498Szrj 
290238fd1498Szrj   /* Compute the costs of the SLP instances.  */
290338fd1498Szrj   scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
290438fd1498Szrj   for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
290538fd1498Szrj     vect_analyze_slp_cost (instance, vinfo->target_cost_data, visited);
290638fd1498Szrj   delete visited;
290738fd1498Szrj 
290838fd1498Szrj   return !vinfo->slp_instances.is_empty ();
290938fd1498Szrj }
291038fd1498Szrj 
291138fd1498Szrj 
291238fd1498Szrj /* Compute the scalar cost of the SLP node NODE and its children
291338fd1498Szrj    and return it.  Do not account defs that are marked in LIFE and
291438fd1498Szrj    update LIFE according to uses of NODE.  */
291538fd1498Szrj 
291638fd1498Szrj static unsigned
vect_bb_slp_scalar_cost(basic_block bb,slp_tree node,vec<bool,va_heap> * life)291738fd1498Szrj vect_bb_slp_scalar_cost (basic_block bb,
291838fd1498Szrj 			 slp_tree node, vec<bool, va_heap> *life)
291938fd1498Szrj {
292038fd1498Szrj   unsigned scalar_cost = 0;
292138fd1498Szrj   unsigned i;
292238fd1498Szrj   gimple *stmt;
292338fd1498Szrj   slp_tree child;
292438fd1498Szrj 
292538fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
292638fd1498Szrj     {
292738fd1498Szrj       unsigned stmt_cost;
292838fd1498Szrj       ssa_op_iter op_iter;
292938fd1498Szrj       def_operand_p def_p;
293038fd1498Szrj       stmt_vec_info stmt_info;
293138fd1498Szrj 
293238fd1498Szrj       if ((*life)[i])
293338fd1498Szrj 	continue;
293438fd1498Szrj 
293538fd1498Szrj       /* If there is a non-vectorized use of the defs then the scalar
293638fd1498Szrj          stmt is kept live in which case we do not account it or any
293738fd1498Szrj 	 required defs in the SLP children in the scalar cost.  This
293838fd1498Szrj 	 way we make the vectorization more costly when compared to
293938fd1498Szrj 	 the scalar cost.  */
294038fd1498Szrj       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
294138fd1498Szrj 	{
294238fd1498Szrj 	  imm_use_iterator use_iter;
294338fd1498Szrj 	  gimple *use_stmt;
294438fd1498Szrj 	  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
294538fd1498Szrj 	    if (!is_gimple_debug (use_stmt)
294638fd1498Szrj 		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
294738fd1498Szrj 					     use_stmt)
294838fd1498Szrj 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
294938fd1498Szrj 	      {
295038fd1498Szrj 		(*life)[i] = true;
295138fd1498Szrj 		BREAK_FROM_IMM_USE_STMT (use_iter);
295238fd1498Szrj 	      }
295338fd1498Szrj 	}
295438fd1498Szrj       if ((*life)[i])
295538fd1498Szrj 	continue;
295638fd1498Szrj 
295738fd1498Szrj       /* Count scalar stmts only once.  */
295838fd1498Szrj       if (gimple_visited_p (stmt))
295938fd1498Szrj 	continue;
296038fd1498Szrj       gimple_set_visited (stmt, true);
296138fd1498Szrj 
296238fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
296338fd1498Szrj       if (STMT_VINFO_DATA_REF (stmt_info))
296438fd1498Szrj         {
296538fd1498Szrj           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
296638fd1498Szrj             stmt_cost = vect_get_stmt_cost (scalar_load);
296738fd1498Szrj           else
296838fd1498Szrj             stmt_cost = vect_get_stmt_cost (scalar_store);
296938fd1498Szrj         }
297038fd1498Szrj       else
297138fd1498Szrj         stmt_cost = vect_get_stmt_cost (scalar_stmt);
297238fd1498Szrj 
297338fd1498Szrj       scalar_cost += stmt_cost;
297438fd1498Szrj     }
297538fd1498Szrj 
297638fd1498Szrj   auto_vec<bool, 20> subtree_life;
297738fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
297838fd1498Szrj     {
297938fd1498Szrj       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
298038fd1498Szrj 	{
298138fd1498Szrj 	  /* Do not directly pass LIFE to the recursive call, copy it to
298238fd1498Szrj 	     confine changes in the callee to the current child/subtree.  */
298338fd1498Szrj 	  subtree_life.safe_splice (*life);
298438fd1498Szrj 	  scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
298538fd1498Szrj 	  subtree_life.truncate (0);
298638fd1498Szrj 	}
298738fd1498Szrj     }
298838fd1498Szrj 
298938fd1498Szrj   return scalar_cost;
299038fd1498Szrj }
299138fd1498Szrj 
299238fd1498Szrj /* Check if vectorization of the basic block is profitable.  */
299338fd1498Szrj 
299438fd1498Szrj static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)299538fd1498Szrj vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
299638fd1498Szrj {
299738fd1498Szrj   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
299838fd1498Szrj   slp_instance instance;
299938fd1498Szrj   int i;
300038fd1498Szrj   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
300138fd1498Szrj   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
300238fd1498Szrj 
300338fd1498Szrj   /* Calculate scalar cost.  */
300438fd1498Szrj   FOR_EACH_VEC_ELT (slp_instances, i, instance)
300538fd1498Szrj     {
300638fd1498Szrj       auto_vec<bool, 20> life;
300738fd1498Szrj       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
300838fd1498Szrj       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
300938fd1498Szrj 					      SLP_INSTANCE_TREE (instance),
301038fd1498Szrj 					      &life);
301138fd1498Szrj     }
301238fd1498Szrj 
301338fd1498Szrj   /* Unset visited flag.  */
301438fd1498Szrj   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
301538fd1498Szrj        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
301638fd1498Szrj     gimple_set_visited  (gsi_stmt (gsi), false);
301738fd1498Szrj 
301838fd1498Szrj   /* Complete the target-specific cost calculation.  */
301938fd1498Szrj   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
302038fd1498Szrj 	       &vec_inside_cost, &vec_epilogue_cost);
302138fd1498Szrj 
302238fd1498Szrj   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
302338fd1498Szrj 
302438fd1498Szrj   if (dump_enabled_p ())
302538fd1498Szrj     {
302638fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
302738fd1498Szrj       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
302838fd1498Szrj 		   vec_inside_cost);
302938fd1498Szrj       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
303038fd1498Szrj       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
303138fd1498Szrj       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
303238fd1498Szrj     }
303338fd1498Szrj 
303438fd1498Szrj   /* Vectorization is profitable if its cost is more than the cost of scalar
303538fd1498Szrj      version.  Note that we err on the vector side for equal cost because
303638fd1498Szrj      the cost estimate is otherwise quite pessimistic (constant uses are
303738fd1498Szrj      free on the scalar side but cost a load on the vector side for
303838fd1498Szrj      example).  */
303938fd1498Szrj   if (vec_outside_cost + vec_inside_cost > scalar_cost)
304038fd1498Szrj     return false;
304138fd1498Szrj 
304238fd1498Szrj   return true;
304338fd1498Szrj }
304438fd1498Szrj 
304538fd1498Szrj /* Check if the basic block can be vectorized.  Returns a bb_vec_info
304638fd1498Szrj    if so and sets fatal to true if failure is independent of
304738fd1498Szrj    current_vector_size.  */
304838fd1498Szrj 
304938fd1498Szrj static bb_vec_info
vect_slp_analyze_bb_1(gimple_stmt_iterator region_begin,gimple_stmt_iterator region_end,vec<data_reference_p> datarefs,int n_stmts,bool & fatal)305038fd1498Szrj vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
305138fd1498Szrj 		       gimple_stmt_iterator region_end,
305238fd1498Szrj 		       vec<data_reference_p> datarefs, int n_stmts,
305338fd1498Szrj 		       bool &fatal)
305438fd1498Szrj {
305538fd1498Szrj   bb_vec_info bb_vinfo;
305638fd1498Szrj   slp_instance instance;
305738fd1498Szrj   int i;
305838fd1498Szrj   poly_uint64 min_vf = 2;
305938fd1498Szrj 
306038fd1498Szrj   /* The first group of checks is independent of the vector size.  */
306138fd1498Szrj   fatal = true;
306238fd1498Szrj 
306338fd1498Szrj   if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
306438fd1498Szrj     {
306538fd1498Szrj       if (dump_enabled_p ())
306638fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
306738fd1498Szrj 			 "not vectorized: too many instructions in "
306838fd1498Szrj 			 "basic block.\n");
306938fd1498Szrj       free_data_refs (datarefs);
307038fd1498Szrj       return NULL;
307138fd1498Szrj     }
307238fd1498Szrj 
307338fd1498Szrj   bb_vinfo = new _bb_vec_info (region_begin, region_end);
307438fd1498Szrj   if (!bb_vinfo)
307538fd1498Szrj     return NULL;
307638fd1498Szrj 
307738fd1498Szrj   BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
307838fd1498Szrj 
307938fd1498Szrj   /* Analyze the data references.  */
308038fd1498Szrj 
308138fd1498Szrj   if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
308238fd1498Szrj     {
308338fd1498Szrj       if (dump_enabled_p ())
308438fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
308538fd1498Szrj 			 "not vectorized: unhandled data-ref in basic "
308638fd1498Szrj 			 "block.\n");
308738fd1498Szrj 
308838fd1498Szrj       delete bb_vinfo;
308938fd1498Szrj       return NULL;
309038fd1498Szrj     }
309138fd1498Szrj 
309238fd1498Szrj   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
309338fd1498Szrj     {
309438fd1498Szrj       if (dump_enabled_p ())
309538fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309638fd1498Szrj 			 "not vectorized: not enough data-refs in "
309738fd1498Szrj 			 "basic block.\n");
309838fd1498Szrj 
309938fd1498Szrj       delete bb_vinfo;
310038fd1498Szrj       return NULL;
310138fd1498Szrj     }
310238fd1498Szrj 
310338fd1498Szrj   if (!vect_analyze_data_ref_accesses (bb_vinfo))
310438fd1498Szrj     {
310538fd1498Szrj      if (dump_enabled_p ())
310638fd1498Szrj        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310738fd1498Szrj 			"not vectorized: unhandled data access in "
310838fd1498Szrj 			"basic block.\n");
310938fd1498Szrj 
311038fd1498Szrj       delete bb_vinfo;
311138fd1498Szrj       return NULL;
311238fd1498Szrj     }
311338fd1498Szrj 
311438fd1498Szrj   /* If there are no grouped stores in the region there is no need
311538fd1498Szrj      to continue with pattern recog as vect_analyze_slp will fail
311638fd1498Szrj      anyway.  */
311738fd1498Szrj   if (bb_vinfo->grouped_stores.is_empty ())
311838fd1498Szrj     {
311938fd1498Szrj       if (dump_enabled_p ())
312038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
312138fd1498Szrj 			 "not vectorized: no grouped stores in "
312238fd1498Szrj 			 "basic block.\n");
312338fd1498Szrj 
312438fd1498Szrj       delete bb_vinfo;
312538fd1498Szrj       return NULL;
312638fd1498Szrj     }
312738fd1498Szrj 
312838fd1498Szrj   /* While the rest of the analysis below depends on it in some way.  */
312938fd1498Szrj   fatal = false;
313038fd1498Szrj 
313138fd1498Szrj   vect_pattern_recog (bb_vinfo);
313238fd1498Szrj 
313338fd1498Szrj   /* Check the SLP opportunities in the basic block, analyze and build SLP
313438fd1498Szrj      trees.  */
313538fd1498Szrj   if (!vect_analyze_slp (bb_vinfo, n_stmts))
313638fd1498Szrj     {
313738fd1498Szrj       if (dump_enabled_p ())
313838fd1498Szrj 	{
313938fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
314038fd1498Szrj 			   "Failed to SLP the basic block.\n");
314138fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
314238fd1498Szrj 			   "not vectorized: failed to find SLP opportunities "
314338fd1498Szrj 			   "in basic block.\n");
314438fd1498Szrj 	}
314538fd1498Szrj 
314638fd1498Szrj       delete bb_vinfo;
314738fd1498Szrj       return NULL;
314838fd1498Szrj     }
314938fd1498Szrj 
315038fd1498Szrj   vect_record_base_alignments (bb_vinfo);
315138fd1498Szrj 
315238fd1498Szrj   /* Analyze and verify the alignment of data references and the
315338fd1498Szrj      dependence in the SLP instances.  */
315438fd1498Szrj   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
315538fd1498Szrj     {
315638fd1498Szrj       if (! vect_slp_analyze_and_verify_instance_alignment (instance)
315738fd1498Szrj 	  || ! vect_slp_analyze_instance_dependence (instance))
315838fd1498Szrj 	{
315938fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
316038fd1498Szrj 			   "removing SLP instance operations starting from: ");
316138fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
316238fd1498Szrj 			    SLP_TREE_SCALAR_STMTS
316338fd1498Szrj 			      (SLP_INSTANCE_TREE (instance))[0], 0);
316438fd1498Szrj 	  vect_free_slp_instance (instance);
316538fd1498Szrj 	  BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
316638fd1498Szrj 	  continue;
316738fd1498Szrj 	}
316838fd1498Szrj 
316938fd1498Szrj       /* Mark all the statements that we want to vectorize as pure SLP and
317038fd1498Szrj 	 relevant.  */
317138fd1498Szrj       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
317238fd1498Szrj       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
317338fd1498Szrj 
317438fd1498Szrj       i++;
317538fd1498Szrj     }
317638fd1498Szrj   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
317738fd1498Szrj     {
317838fd1498Szrj       delete bb_vinfo;
317938fd1498Szrj       return NULL;
318038fd1498Szrj     }
318138fd1498Szrj 
318238fd1498Szrj   if (!vect_slp_analyze_operations (bb_vinfo))
318338fd1498Szrj     {
318438fd1498Szrj       if (dump_enabled_p ())
318538fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
318638fd1498Szrj 			 "not vectorized: bad operation in basic block.\n");
318738fd1498Szrj 
318838fd1498Szrj       delete bb_vinfo;
318938fd1498Szrj       return NULL;
319038fd1498Szrj     }
319138fd1498Szrj 
319238fd1498Szrj   /* Cost model: check if the vectorization is worthwhile.  */
319338fd1498Szrj   if (!unlimited_cost_model (NULL)
319438fd1498Szrj       && !vect_bb_vectorization_profitable_p (bb_vinfo))
319538fd1498Szrj     {
319638fd1498Szrj       if (dump_enabled_p ())
319738fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
319838fd1498Szrj 			 "not vectorized: vectorization is not "
319938fd1498Szrj 			 "profitable.\n");
320038fd1498Szrj 
320138fd1498Szrj       delete bb_vinfo;
320238fd1498Szrj       return NULL;
320338fd1498Szrj     }
320438fd1498Szrj 
320538fd1498Szrj   if (dump_enabled_p ())
320638fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
320738fd1498Szrj 		     "Basic block will be vectorized using SLP\n");
320838fd1498Szrj 
320938fd1498Szrj   return bb_vinfo;
321038fd1498Szrj }
321138fd1498Szrj 
321238fd1498Szrj 
321338fd1498Szrj /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
321438fd1498Szrj    true if anything in the basic-block was vectorized.  */
321538fd1498Szrj 
321638fd1498Szrj bool
vect_slp_bb(basic_block bb)321738fd1498Szrj vect_slp_bb (basic_block bb)
321838fd1498Szrj {
321938fd1498Szrj   bb_vec_info bb_vinfo;
322038fd1498Szrj   gimple_stmt_iterator gsi;
322138fd1498Szrj   bool any_vectorized = false;
322238fd1498Szrj   auto_vector_sizes vector_sizes;
322338fd1498Szrj 
322438fd1498Szrj   if (dump_enabled_p ())
322538fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
322638fd1498Szrj 
322738fd1498Szrj   /* Autodetect first vector size we try.  */
322838fd1498Szrj   current_vector_size = 0;
322938fd1498Szrj   targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
323038fd1498Szrj   unsigned int next_size = 0;
323138fd1498Szrj 
323238fd1498Szrj   gsi = gsi_start_bb (bb);
323338fd1498Szrj 
323438fd1498Szrj   poly_uint64 autodetected_vector_size = 0;
323538fd1498Szrj   while (1)
323638fd1498Szrj     {
323738fd1498Szrj       if (gsi_end_p (gsi))
323838fd1498Szrj 	break;
323938fd1498Szrj 
324038fd1498Szrj       gimple_stmt_iterator region_begin = gsi;
324138fd1498Szrj       vec<data_reference_p> datarefs = vNULL;
324238fd1498Szrj       int insns = 0;
324338fd1498Szrj 
324438fd1498Szrj       for (; !gsi_end_p (gsi); gsi_next (&gsi))
324538fd1498Szrj 	{
324638fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
324738fd1498Szrj 	  if (is_gimple_debug (stmt))
324838fd1498Szrj 	    continue;
324938fd1498Szrj 	  insns++;
325038fd1498Szrj 
325138fd1498Szrj 	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
325238fd1498Szrj 	    vect_location = gimple_location (stmt);
325338fd1498Szrj 
325438fd1498Szrj 	  if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
325538fd1498Szrj 	    break;
325638fd1498Szrj 	}
325738fd1498Szrj 
325838fd1498Szrj       /* Skip leading unhandled stmts.  */
325938fd1498Szrj       if (gsi_stmt (region_begin) == gsi_stmt (gsi))
326038fd1498Szrj 	{
326138fd1498Szrj 	  gsi_next (&gsi);
326238fd1498Szrj 	  continue;
326338fd1498Szrj 	}
326438fd1498Szrj 
326538fd1498Szrj       gimple_stmt_iterator region_end = gsi;
326638fd1498Szrj 
326738fd1498Szrj       bool vectorized = false;
326838fd1498Szrj       bool fatal = false;
326938fd1498Szrj       bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
327038fd1498Szrj 					datarefs, insns, fatal);
327138fd1498Szrj       if (bb_vinfo
327238fd1498Szrj 	  && dbg_cnt (vect_slp))
327338fd1498Szrj 	{
327438fd1498Szrj 	  if (dump_enabled_p ())
327538fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
327638fd1498Szrj 
327738fd1498Szrj 	  vect_schedule_slp (bb_vinfo);
327838fd1498Szrj 
327938fd1498Szrj 	  if (dump_enabled_p ())
328038fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
328138fd1498Szrj 			     "basic block part vectorized\n");
328238fd1498Szrj 
328338fd1498Szrj 	  vectorized = true;
328438fd1498Szrj 	}
328538fd1498Szrj       delete bb_vinfo;
328638fd1498Szrj 
328738fd1498Szrj       any_vectorized |= vectorized;
328838fd1498Szrj 
328938fd1498Szrj       if (next_size == 0)
329038fd1498Szrj 	autodetected_vector_size = current_vector_size;
329138fd1498Szrj 
329238fd1498Szrj       if (next_size < vector_sizes.length ()
329338fd1498Szrj 	  && known_eq (vector_sizes[next_size], autodetected_vector_size))
329438fd1498Szrj 	next_size += 1;
329538fd1498Szrj 
329638fd1498Szrj       if (vectorized
329738fd1498Szrj 	  || next_size == vector_sizes.length ()
329838fd1498Szrj 	  || known_eq (current_vector_size, 0U)
329938fd1498Szrj 	  /* If vect_slp_analyze_bb_1 signaled that analysis for all
330038fd1498Szrj 	     vector sizes will fail do not bother iterating.  */
330138fd1498Szrj 	  || fatal)
330238fd1498Szrj 	{
330338fd1498Szrj 	  if (gsi_end_p (region_end))
330438fd1498Szrj 	    break;
330538fd1498Szrj 
330638fd1498Szrj 	  /* Skip the unhandled stmt.  */
330738fd1498Szrj 	  gsi_next (&gsi);
330838fd1498Szrj 
330938fd1498Szrj 	  /* And reset vector sizes.  */
331038fd1498Szrj 	  current_vector_size = 0;
331138fd1498Szrj 	  next_size = 0;
331238fd1498Szrj 	}
331338fd1498Szrj       else
331438fd1498Szrj 	{
331538fd1498Szrj 	  /* Try the next biggest vector size.  */
331638fd1498Szrj 	  current_vector_size = vector_sizes[next_size++];
331738fd1498Szrj 	  if (dump_enabled_p ())
331838fd1498Szrj 	    {
331938fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
332038fd1498Szrj 			       "***** Re-trying analysis with "
332138fd1498Szrj 			       "vector size ");
332238fd1498Szrj 	      dump_dec (MSG_NOTE, current_vector_size);
332338fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
332438fd1498Szrj 	    }
332538fd1498Szrj 
332638fd1498Szrj 	  /* Start over.  */
332738fd1498Szrj 	  gsi = region_begin;
332838fd1498Szrj 	}
332938fd1498Szrj     }
333038fd1498Szrj 
333138fd1498Szrj   return any_vectorized;
333238fd1498Szrj }
333338fd1498Szrj 
333438fd1498Szrj 
333538fd1498Szrj /* Return 1 if vector type of boolean constant which is OPNUM
333638fd1498Szrj    operand in statement STMT is a boolean vector.  */
333738fd1498Szrj 
333838fd1498Szrj static bool
vect_mask_constant_operand_p(gimple * stmt,int opnum)333938fd1498Szrj vect_mask_constant_operand_p (gimple *stmt, int opnum)
334038fd1498Szrj {
334138fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
334238fd1498Szrj   enum tree_code code = gimple_expr_code (stmt);
334338fd1498Szrj   tree op, vectype;
334438fd1498Szrj   gimple *def_stmt;
334538fd1498Szrj   enum vect_def_type dt;
334638fd1498Szrj 
334738fd1498Szrj   /* For comparison and COND_EXPR type is chosen depending
334838fd1498Szrj      on the other comparison operand.  */
334938fd1498Szrj   if (TREE_CODE_CLASS (code) == tcc_comparison)
335038fd1498Szrj     {
335138fd1498Szrj       if (opnum)
335238fd1498Szrj 	op = gimple_assign_rhs1 (stmt);
335338fd1498Szrj       else
335438fd1498Szrj 	op = gimple_assign_rhs2 (stmt);
335538fd1498Szrj 
335638fd1498Szrj       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
335738fd1498Szrj 			       &dt, &vectype))
335838fd1498Szrj 	gcc_unreachable ();
335938fd1498Szrj 
336038fd1498Szrj       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
336138fd1498Szrj     }
336238fd1498Szrj 
336338fd1498Szrj   if (code == COND_EXPR)
336438fd1498Szrj     {
336538fd1498Szrj       tree cond = gimple_assign_rhs1 (stmt);
336638fd1498Szrj 
336738fd1498Szrj       if (TREE_CODE (cond) == SSA_NAME)
336838fd1498Szrj 	op = cond;
336938fd1498Szrj       else if (opnum)
337038fd1498Szrj 	op = TREE_OPERAND (cond, 1);
337138fd1498Szrj       else
337238fd1498Szrj 	op = TREE_OPERAND (cond, 0);
337338fd1498Szrj 
337438fd1498Szrj       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
337538fd1498Szrj 			       &dt, &vectype))
337638fd1498Szrj 	gcc_unreachable ();
337738fd1498Szrj 
337838fd1498Szrj       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
337938fd1498Szrj     }
338038fd1498Szrj 
338138fd1498Szrj   return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
338238fd1498Szrj }
338338fd1498Szrj 
338438fd1498Szrj /* Build a variable-length vector in which the elements in ELTS are repeated
338538fd1498Szrj    to a fill NRESULTS vectors of type VECTOR_TYPE.  Store the vectors in
338638fd1498Szrj    RESULTS and add any new instructions to SEQ.
338738fd1498Szrj 
338838fd1498Szrj    The approach we use is:
338938fd1498Szrj 
339038fd1498Szrj    (1) Find a vector mode VM with integer elements of mode IM.
339138fd1498Szrj 
339238fd1498Szrj    (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
339338fd1498Szrj        ELTS' has mode IM.  This involves creating NELTS' VIEW_CONVERT_EXPRs
339438fd1498Szrj        from small vectors to IM.
339538fd1498Szrj 
339638fd1498Szrj    (3) Duplicate each ELTS'[I] into a vector of mode VM.
339738fd1498Szrj 
339838fd1498Szrj    (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
339938fd1498Szrj        correct byte contents.
340038fd1498Szrj 
340138fd1498Szrj    (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
340238fd1498Szrj 
340338fd1498Szrj    We try to find the largest IM for which this sequence works, in order
340438fd1498Szrj    to cut down on the number of interleaves.  */
340538fd1498Szrj 
340638fd1498Szrj void
duplicate_and_interleave(gimple_seq * seq,tree vector_type,vec<tree> elts,unsigned int nresults,vec<tree> & results)340738fd1498Szrj duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
340838fd1498Szrj 			  unsigned int nresults, vec<tree> &results)
340938fd1498Szrj {
341038fd1498Szrj   unsigned int nelts = elts.length ();
341138fd1498Szrj   tree element_type = TREE_TYPE (vector_type);
341238fd1498Szrj 
341338fd1498Szrj   /* (1) Find a vector mode VM with integer elements of mode IM.  */
341438fd1498Szrj   unsigned int nvectors = 1;
341538fd1498Szrj   tree new_vector_type;
341638fd1498Szrj   tree permutes[2];
341738fd1498Szrj   if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
341838fd1498Szrj 				       &nvectors, &new_vector_type,
341938fd1498Szrj 				       permutes))
342038fd1498Szrj     gcc_unreachable ();
342138fd1498Szrj 
342238fd1498Szrj   /* Get a vector type that holds ELTS[0:NELTS/NELTS'].  */
342338fd1498Szrj   unsigned int partial_nelts = nelts / nvectors;
342438fd1498Szrj   tree partial_vector_type = build_vector_type (element_type, partial_nelts);
342538fd1498Szrj 
342638fd1498Szrj   tree_vector_builder partial_elts;
342738fd1498Szrj   auto_vec<tree, 32> pieces (nvectors * 2);
342838fd1498Szrj   pieces.quick_grow (nvectors * 2);
342938fd1498Szrj   for (unsigned int i = 0; i < nvectors; ++i)
343038fd1498Szrj     {
343138fd1498Szrj       /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
343238fd1498Szrj 	     ELTS' has mode IM.  */
343338fd1498Szrj       partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
343438fd1498Szrj       for (unsigned int j = 0; j < partial_nelts; ++j)
343538fd1498Szrj 	partial_elts.quick_push (elts[i * partial_nelts + j]);
343638fd1498Szrj       tree t = gimple_build_vector (seq, &partial_elts);
343738fd1498Szrj       t = gimple_build (seq, VIEW_CONVERT_EXPR,
343838fd1498Szrj 			TREE_TYPE (new_vector_type), t);
343938fd1498Szrj 
344038fd1498Szrj       /* (3) Duplicate each ELTS'[I] into a vector of mode VM.  */
344138fd1498Szrj       pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
344238fd1498Szrj     }
344338fd1498Szrj 
344438fd1498Szrj   /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
344538fd1498Szrj 	 correct byte contents.
344638fd1498Szrj 
344738fd1498Szrj      We need to repeat the following operation log2(nvectors) times:
344838fd1498Szrj 
344938fd1498Szrj 	out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
345038fd1498Szrj 	out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
345138fd1498Szrj 
345238fd1498Szrj      However, if each input repeats every N elements and the VF is
345338fd1498Szrj      a multiple of N * 2, the HI result is the same as the LO.  */
345438fd1498Szrj   unsigned int in_start = 0;
345538fd1498Szrj   unsigned int out_start = nvectors;
345638fd1498Szrj   unsigned int hi_start = nvectors / 2;
345738fd1498Szrj   /* A bound on the number of outputs needed to produce NRESULTS results
345838fd1498Szrj      in the final iteration.  */
345938fd1498Szrj   unsigned int noutputs_bound = nvectors * nresults;
346038fd1498Szrj   for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
346138fd1498Szrj     {
346238fd1498Szrj       noutputs_bound /= 2;
346338fd1498Szrj       unsigned int limit = MIN (noutputs_bound, nvectors);
346438fd1498Szrj       for (unsigned int i = 0; i < limit; ++i)
346538fd1498Szrj 	{
346638fd1498Szrj 	  if ((i & 1) != 0
346738fd1498Szrj 	      && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
346838fd1498Szrj 			     2 * in_repeat))
346938fd1498Szrj 	    {
347038fd1498Szrj 	      pieces[out_start + i] = pieces[out_start + i - 1];
347138fd1498Szrj 	      continue;
347238fd1498Szrj 	    }
347338fd1498Szrj 
347438fd1498Szrj 	  tree output = make_ssa_name (new_vector_type);
347538fd1498Szrj 	  tree input1 = pieces[in_start + (i / 2)];
347638fd1498Szrj 	  tree input2 = pieces[in_start + (i / 2) + hi_start];
347738fd1498Szrj 	  gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
347838fd1498Szrj 					       input1, input2,
347938fd1498Szrj 					       permutes[i & 1]);
348038fd1498Szrj 	  gimple_seq_add_stmt (seq, stmt);
348138fd1498Szrj 	  pieces[out_start + i] = output;
348238fd1498Szrj 	}
348338fd1498Szrj       std::swap (in_start, out_start);
348438fd1498Szrj     }
348538fd1498Szrj 
348638fd1498Szrj   /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type.  */
348738fd1498Szrj   results.reserve (nresults);
348838fd1498Szrj   for (unsigned int i = 0; i < nresults; ++i)
348938fd1498Szrj     if (i < nvectors)
349038fd1498Szrj       results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
349138fd1498Szrj 					pieces[in_start + i]));
349238fd1498Szrj     else
349338fd1498Szrj       results.quick_push (results[i - nvectors]);
349438fd1498Szrj }
349538fd1498Szrj 
349638fd1498Szrj 
349738fd1498Szrj /* For constant and loop invariant defs of SLP_NODE this function returns
349838fd1498Szrj    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
349938fd1498Szrj    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
350038fd1498Szrj    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
350138fd1498Szrj    REDUC_INDEX is the index of the reduction operand in the statements, unless
350238fd1498Szrj    it is -1.  */
350338fd1498Szrj 
350438fd1498Szrj static void
vect_get_constant_vectors(tree op,slp_tree slp_node,vec<tree> * vec_oprnds,unsigned int op_num,unsigned int number_of_vectors)350538fd1498Szrj vect_get_constant_vectors (tree op, slp_tree slp_node,
350638fd1498Szrj                            vec<tree> *vec_oprnds,
350738fd1498Szrj 			   unsigned int op_num, unsigned int number_of_vectors)
350838fd1498Szrj {
350938fd1498Szrj   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
351038fd1498Szrj   gimple *stmt = stmts[0];
351138fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
351238fd1498Szrj   unsigned HOST_WIDE_INT nunits;
351338fd1498Szrj   tree vec_cst;
351438fd1498Szrj   unsigned j, number_of_places_left_in_vector;
351538fd1498Szrj   tree vector_type;
351638fd1498Szrj   tree vop;
351738fd1498Szrj   int group_size = stmts.length ();
351838fd1498Szrj   unsigned int vec_num, i;
351938fd1498Szrj   unsigned number_of_copies = 1;
352038fd1498Szrj   vec<tree> voprnds;
352138fd1498Szrj   voprnds.create (number_of_vectors);
352238fd1498Szrj   bool constant_p, is_store;
352338fd1498Szrj   tree neutral_op = NULL;
352438fd1498Szrj   enum tree_code code = gimple_expr_code (stmt);
352538fd1498Szrj   gimple_seq ctor_seq = NULL;
352638fd1498Szrj   auto_vec<tree, 16> permute_results;
352738fd1498Szrj 
352838fd1498Szrj   /* Check if vector type is a boolean vector.  */
352938fd1498Szrj   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
353038fd1498Szrj       && vect_mask_constant_operand_p (stmt, op_num))
353138fd1498Szrj     vector_type
353238fd1498Szrj       = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
353338fd1498Szrj   else
353438fd1498Szrj     vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
353538fd1498Szrj 
353638fd1498Szrj   if (STMT_VINFO_DATA_REF (stmt_vinfo))
353738fd1498Szrj     {
353838fd1498Szrj       is_store = true;
353938fd1498Szrj       op = gimple_assign_rhs1 (stmt);
354038fd1498Szrj     }
354138fd1498Szrj   else
354238fd1498Szrj     is_store = false;
354338fd1498Szrj 
354438fd1498Szrj   gcc_assert (op);
354538fd1498Szrj 
354638fd1498Szrj   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
354738fd1498Szrj      created vectors. It is greater than 1 if unrolling is performed.
354838fd1498Szrj 
354938fd1498Szrj      For example, we have two scalar operands, s1 and s2 (e.g., group of
355038fd1498Szrj      strided accesses of size two), while NUNITS is four (i.e., four scalars
355138fd1498Szrj      of this type can be packed in a vector).  The output vector will contain
355238fd1498Szrj      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
355338fd1498Szrj      will be 2).
355438fd1498Szrj 
355538fd1498Szrj      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
355638fd1498Szrj      containing the operands.
355738fd1498Szrj 
355838fd1498Szrj      For example, NUNITS is four as before, and the group size is 8
355938fd1498Szrj      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
356038fd1498Szrj      {s5, s6, s7, s8}.  */
356138fd1498Szrj 
356238fd1498Szrj   /* When using duplicate_and_interleave, we just need one element for
356338fd1498Szrj      each scalar statement.  */
356438fd1498Szrj   if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
356538fd1498Szrj     nunits = group_size;
356638fd1498Szrj 
356738fd1498Szrj   number_of_copies = nunits * number_of_vectors / group_size;
356838fd1498Szrj 
356938fd1498Szrj   number_of_places_left_in_vector = nunits;
357038fd1498Szrj   constant_p = true;
357138fd1498Szrj   tree_vector_builder elts (vector_type, nunits, 1);
357238fd1498Szrj   elts.quick_grow (nunits);
357338fd1498Szrj   bool place_after_defs = false;
357438fd1498Szrj   for (j = 0; j < number_of_copies; j++)
357538fd1498Szrj     {
357638fd1498Szrj       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
357738fd1498Szrj         {
357838fd1498Szrj           if (is_store)
357938fd1498Szrj             op = gimple_assign_rhs1 (stmt);
358038fd1498Szrj           else
358138fd1498Szrj 	    {
358238fd1498Szrj 	      switch (code)
358338fd1498Szrj 		{
358438fd1498Szrj 		  case COND_EXPR:
358538fd1498Szrj 		    {
358638fd1498Szrj 		      tree cond = gimple_assign_rhs1 (stmt);
358738fd1498Szrj 		      if (TREE_CODE (cond) == SSA_NAME)
358838fd1498Szrj 			op = gimple_op (stmt, op_num + 1);
358938fd1498Szrj 		      else if (op_num == 0 || op_num == 1)
359038fd1498Szrj 			op = TREE_OPERAND (cond, op_num);
359138fd1498Szrj 		      else
359238fd1498Szrj 			{
359338fd1498Szrj 			  if (op_num == 2)
359438fd1498Szrj 			    op = gimple_assign_rhs2 (stmt);
359538fd1498Szrj 			  else
359638fd1498Szrj 			    op = gimple_assign_rhs3 (stmt);
359738fd1498Szrj 			}
359838fd1498Szrj 		    }
359938fd1498Szrj 		    break;
360038fd1498Szrj 
360138fd1498Szrj 		  case CALL_EXPR:
360238fd1498Szrj 		    op = gimple_call_arg (stmt, op_num);
360338fd1498Szrj 		    break;
360438fd1498Szrj 
360538fd1498Szrj 		  case LSHIFT_EXPR:
360638fd1498Szrj 		  case RSHIFT_EXPR:
360738fd1498Szrj 		  case LROTATE_EXPR:
360838fd1498Szrj 		  case RROTATE_EXPR:
360938fd1498Szrj 		    op = gimple_op (stmt, op_num + 1);
361038fd1498Szrj 		    /* Unlike the other binary operators, shifts/rotates have
361138fd1498Szrj 		       the shift count being int, instead of the same type as
361238fd1498Szrj 		       the lhs, so make sure the scalar is the right type if
361338fd1498Szrj 		       we are dealing with vectors of
361438fd1498Szrj 		       long long/long/short/char.  */
361538fd1498Szrj 		    if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
361638fd1498Szrj 		      op = fold_convert (TREE_TYPE (vector_type), op);
361738fd1498Szrj 		    break;
361838fd1498Szrj 
361938fd1498Szrj 		  default:
362038fd1498Szrj 		    op = gimple_op (stmt, op_num + 1);
362138fd1498Szrj 		    break;
362238fd1498Szrj 		}
362338fd1498Szrj 	    }
362438fd1498Szrj 
362538fd1498Szrj           /* Create 'vect_ = {op0,op1,...,opn}'.  */
362638fd1498Szrj           number_of_places_left_in_vector--;
362738fd1498Szrj 	  tree orig_op = op;
362838fd1498Szrj 	  if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
362938fd1498Szrj 	    {
363038fd1498Szrj 	      if (CONSTANT_CLASS_P (op))
363138fd1498Szrj 		{
363238fd1498Szrj 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
363338fd1498Szrj 		    {
363438fd1498Szrj 		      /* Can't use VIEW_CONVERT_EXPR for booleans because
363538fd1498Szrj 			 of possibly different sizes of scalar value and
363638fd1498Szrj 			 vector element.  */
363738fd1498Szrj 		      if (integer_zerop (op))
363838fd1498Szrj 			op = build_int_cst (TREE_TYPE (vector_type), 0);
363938fd1498Szrj 		      else if (integer_onep (op))
364038fd1498Szrj 			op = build_all_ones_cst (TREE_TYPE (vector_type));
364138fd1498Szrj 		      else
364238fd1498Szrj 			gcc_unreachable ();
364338fd1498Szrj 		    }
364438fd1498Szrj 		  else
364538fd1498Szrj 		    op = fold_unary (VIEW_CONVERT_EXPR,
364638fd1498Szrj 				     TREE_TYPE (vector_type), op);
364738fd1498Szrj 		  gcc_assert (op && CONSTANT_CLASS_P (op));
364838fd1498Szrj 		}
364938fd1498Szrj 	      else
365038fd1498Szrj 		{
365138fd1498Szrj 		  tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
365238fd1498Szrj 		  gimple *init_stmt;
365338fd1498Szrj 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
365438fd1498Szrj 		    {
365538fd1498Szrj 		      tree true_val
365638fd1498Szrj 			= build_all_ones_cst (TREE_TYPE (vector_type));
365738fd1498Szrj 		      tree false_val
365838fd1498Szrj 			= build_zero_cst (TREE_TYPE (vector_type));
365938fd1498Szrj 		      gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
366038fd1498Szrj 		      init_stmt = gimple_build_assign (new_temp, COND_EXPR,
366138fd1498Szrj 						       op, true_val,
366238fd1498Szrj 						       false_val);
366338fd1498Szrj 		    }
366438fd1498Szrj 		  else
366538fd1498Szrj 		    {
366638fd1498Szrj 		      op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
366738fd1498Szrj 				   op);
366838fd1498Szrj 		      init_stmt
366938fd1498Szrj 			= gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
367038fd1498Szrj 					       op);
367138fd1498Szrj 		    }
367238fd1498Szrj 		  gimple_seq_add_stmt (&ctor_seq, init_stmt);
367338fd1498Szrj 		  op = new_temp;
367438fd1498Szrj 		}
367538fd1498Szrj 	    }
367638fd1498Szrj 	  elts[number_of_places_left_in_vector] = op;
367738fd1498Szrj 	  if (!CONSTANT_CLASS_P (op))
367838fd1498Szrj 	    constant_p = false;
367938fd1498Szrj 	  if (TREE_CODE (orig_op) == SSA_NAME
368038fd1498Szrj 	      && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
368138fd1498Szrj 	      && STMT_VINFO_BB_VINFO (stmt_vinfo)
368238fd1498Szrj 	      && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
368338fd1498Szrj 		  == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
368438fd1498Szrj 	    place_after_defs = true;
368538fd1498Szrj 
368638fd1498Szrj           if (number_of_places_left_in_vector == 0)
368738fd1498Szrj             {
368838fd1498Szrj 	      if (constant_p
368938fd1498Szrj 		  ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
369038fd1498Szrj 		  : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
369138fd1498Szrj 		vec_cst = gimple_build_vector (&ctor_seq, &elts);
369238fd1498Szrj 	      else
369338fd1498Szrj 		{
369438fd1498Szrj 		  if (vec_oprnds->is_empty ())
369538fd1498Szrj 		    duplicate_and_interleave (&ctor_seq, vector_type, elts,
369638fd1498Szrj 					      number_of_vectors,
369738fd1498Szrj 					      permute_results);
369838fd1498Szrj 		  vec_cst = permute_results[number_of_vectors - j - 1];
369938fd1498Szrj 		}
370038fd1498Szrj 	      tree init;
370138fd1498Szrj 	      gimple_stmt_iterator gsi;
370238fd1498Szrj 	      if (place_after_defs)
370338fd1498Szrj 		{
370438fd1498Szrj 		  gsi = gsi_for_stmt
370538fd1498Szrj 		          (vect_find_last_scalar_stmt_in_slp (slp_node));
370638fd1498Szrj 		  init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
370738fd1498Szrj 		}
370838fd1498Szrj 	      else
370938fd1498Szrj 		init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
371038fd1498Szrj 	      if (ctor_seq != NULL)
371138fd1498Szrj 		{
371238fd1498Szrj 		  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
371338fd1498Szrj 		  gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
371438fd1498Szrj 		  ctor_seq = NULL;
371538fd1498Szrj 		}
371638fd1498Szrj 	      voprnds.quick_push (init);
371738fd1498Szrj 	      place_after_defs = false;
371838fd1498Szrj               number_of_places_left_in_vector = nunits;
371938fd1498Szrj 	      constant_p = true;
372038fd1498Szrj 	      elts.new_vector (vector_type, nunits, 1);
372138fd1498Szrj 	      elts.quick_grow (nunits);
372238fd1498Szrj             }
372338fd1498Szrj         }
372438fd1498Szrj     }
372538fd1498Szrj 
372638fd1498Szrj   /* Since the vectors are created in the reverse order, we should invert
372738fd1498Szrj      them.  */
372838fd1498Szrj   vec_num = voprnds.length ();
372938fd1498Szrj   for (j = vec_num; j != 0; j--)
373038fd1498Szrj     {
373138fd1498Szrj       vop = voprnds[j - 1];
373238fd1498Szrj       vec_oprnds->quick_push (vop);
373338fd1498Szrj     }
373438fd1498Szrj 
373538fd1498Szrj   voprnds.release ();
373638fd1498Szrj 
373738fd1498Szrj   /* In case that VF is greater than the unrolling factor needed for the SLP
373838fd1498Szrj      group of stmts, NUMBER_OF_VECTORS to be created is greater than
373938fd1498Szrj      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
374038fd1498Szrj      to replicate the vectors.  */
374138fd1498Szrj   while (number_of_vectors > vec_oprnds->length ())
374238fd1498Szrj     {
374338fd1498Szrj       tree neutral_vec = NULL;
374438fd1498Szrj 
374538fd1498Szrj       if (neutral_op)
374638fd1498Szrj         {
374738fd1498Szrj           if (!neutral_vec)
374838fd1498Szrj 	    neutral_vec = build_vector_from_val (vector_type, neutral_op);
374938fd1498Szrj 
375038fd1498Szrj           vec_oprnds->quick_push (neutral_vec);
375138fd1498Szrj         }
375238fd1498Szrj       else
375338fd1498Szrj         {
375438fd1498Szrj           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
375538fd1498Szrj             vec_oprnds->quick_push (vop);
375638fd1498Szrj         }
375738fd1498Szrj     }
375838fd1498Szrj }
375938fd1498Szrj 
376038fd1498Szrj 
376138fd1498Szrj /* Get vectorized definitions from SLP_NODE that contains corresponding
376238fd1498Szrj    vectorized def-stmts.  */
376338fd1498Szrj 
376438fd1498Szrj static void
vect_get_slp_vect_defs(slp_tree slp_node,vec<tree> * vec_oprnds)376538fd1498Szrj vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
376638fd1498Szrj {
376738fd1498Szrj   tree vec_oprnd;
376838fd1498Szrj   gimple *vec_def_stmt;
376938fd1498Szrj   unsigned int i;
377038fd1498Szrj 
377138fd1498Szrj   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
377238fd1498Szrj 
377338fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
377438fd1498Szrj     {
377538fd1498Szrj       gcc_assert (vec_def_stmt);
377638fd1498Szrj       if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
377738fd1498Szrj 	vec_oprnd = gimple_phi_result (vec_def_stmt);
377838fd1498Szrj       else
377938fd1498Szrj 	vec_oprnd = gimple_get_lhs (vec_def_stmt);
378038fd1498Szrj       vec_oprnds->quick_push (vec_oprnd);
378138fd1498Szrj     }
378238fd1498Szrj }
378338fd1498Szrj 
378438fd1498Szrj 
378538fd1498Szrj /* Get vectorized definitions for SLP_NODE.
378638fd1498Szrj    If the scalar definitions are loop invariants or constants, collect them and
378738fd1498Szrj    call vect_get_constant_vectors() to create vector stmts.
378838fd1498Szrj    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
378938fd1498Szrj    must be stored in the corresponding child of SLP_NODE, and we call
379038fd1498Szrj    vect_get_slp_vect_defs () to retrieve them.  */
379138fd1498Szrj 
379238fd1498Szrj void
vect_get_slp_defs(vec<tree> ops,slp_tree slp_node,vec<vec<tree>> * vec_oprnds)379338fd1498Szrj vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
379438fd1498Szrj 		   vec<vec<tree> > *vec_oprnds)
379538fd1498Szrj {
379638fd1498Szrj   gimple *first_stmt;
379738fd1498Szrj   int number_of_vects = 0, i;
379838fd1498Szrj   unsigned int child_index = 0;
379938fd1498Szrj   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
380038fd1498Szrj   slp_tree child = NULL;
380138fd1498Szrj   vec<tree> vec_defs;
380238fd1498Szrj   tree oprnd;
380338fd1498Szrj   bool vectorized_defs;
380438fd1498Szrj 
380538fd1498Szrj   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
380638fd1498Szrj   FOR_EACH_VEC_ELT (ops, i, oprnd)
380738fd1498Szrj     {
380838fd1498Szrj       /* For each operand we check if it has vectorized definitions in a child
380938fd1498Szrj 	 node or we need to create them (for invariants and constants).  We
381038fd1498Szrj 	 check if the LHS of the first stmt of the next child matches OPRND.
381138fd1498Szrj 	 If it does, we found the correct child.  Otherwise, we call
381238fd1498Szrj 	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
381338fd1498Szrj 	 to check this child node for the next operand.  */
381438fd1498Szrj       vectorized_defs = false;
381538fd1498Szrj       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
381638fd1498Szrj         {
381738fd1498Szrj           child = SLP_TREE_CHILDREN (slp_node)[child_index];
381838fd1498Szrj 
381938fd1498Szrj 	  /* We have to check both pattern and original def, if available.  */
382038fd1498Szrj 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
382138fd1498Szrj 	    {
382238fd1498Szrj 	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
382338fd1498Szrj 	      gimple *related
382438fd1498Szrj 		= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
382538fd1498Szrj 	      tree first_def_op;
382638fd1498Szrj 
382738fd1498Szrj 	      if (gimple_code (first_def) == GIMPLE_PHI)
382838fd1498Szrj 		first_def_op = gimple_phi_result (first_def);
382938fd1498Szrj 	      else
383038fd1498Szrj 		first_def_op = gimple_get_lhs (first_def);
383138fd1498Szrj 	      if (operand_equal_p (oprnd, first_def_op, 0)
383238fd1498Szrj 		  || (related
383338fd1498Szrj 		      && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
383438fd1498Szrj 		{
383538fd1498Szrj 		  /* The number of vector defs is determined by the number of
383638fd1498Szrj 		     vector statements in the node from which we get those
383738fd1498Szrj 		     statements.  */
383838fd1498Szrj 		  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
383938fd1498Szrj 		  vectorized_defs = true;
384038fd1498Szrj 		  child_index++;
384138fd1498Szrj 		}
384238fd1498Szrj 	    }
384338fd1498Szrj 	  else
384438fd1498Szrj 	    child_index++;
384538fd1498Szrj         }
384638fd1498Szrj 
384738fd1498Szrj       if (!vectorized_defs)
384838fd1498Szrj         {
384938fd1498Szrj           if (i == 0)
385038fd1498Szrj             {
385138fd1498Szrj               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
385238fd1498Szrj               /* Number of vector stmts was calculated according to LHS in
385338fd1498Szrj                  vect_schedule_slp_instance (), fix it by replacing LHS with
385438fd1498Szrj                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
385538fd1498Szrj                  details.  */
385638fd1498Szrj               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
385738fd1498Szrj                                              &rhs_size_unit);
385838fd1498Szrj               if (rhs_size_unit != lhs_size_unit)
385938fd1498Szrj                 {
386038fd1498Szrj                   number_of_vects *= rhs_size_unit;
386138fd1498Szrj                   number_of_vects /= lhs_size_unit;
386238fd1498Szrj                 }
386338fd1498Szrj             }
386438fd1498Szrj         }
386538fd1498Szrj 
386638fd1498Szrj       /* Allocate memory for vectorized defs.  */
386738fd1498Szrj       vec_defs = vNULL;
386838fd1498Szrj       vec_defs.create (number_of_vects);
386938fd1498Szrj 
387038fd1498Szrj       /* For reduction defs we call vect_get_constant_vectors (), since we are
387138fd1498Szrj          looking for initial loop invariant values.  */
387238fd1498Szrj       if (vectorized_defs)
387338fd1498Szrj         /* The defs are already vectorized.  */
387438fd1498Szrj 	vect_get_slp_vect_defs (child, &vec_defs);
387538fd1498Szrj       else
387638fd1498Szrj 	/* Build vectors from scalar defs.  */
387738fd1498Szrj 	vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
387838fd1498Szrj 				   number_of_vects);
387938fd1498Szrj 
388038fd1498Szrj       vec_oprnds->quick_push (vec_defs);
388138fd1498Szrj     }
388238fd1498Szrj }
388338fd1498Szrj 
388438fd1498Szrj /* Generate vector permute statements from a list of loads in DR_CHAIN.
388538fd1498Szrj    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
388638fd1498Szrj    permute statements for the SLP node NODE of the SLP instance
388738fd1498Szrj    SLP_NODE_INSTANCE.  */
388838fd1498Szrj 
388938fd1498Szrj bool
vect_transform_slp_perm_load(slp_tree node,vec<tree> dr_chain,gimple_stmt_iterator * gsi,poly_uint64 vf,slp_instance slp_node_instance,bool analyze_only,unsigned * n_perms)389038fd1498Szrj vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
389138fd1498Szrj 			      gimple_stmt_iterator *gsi, poly_uint64 vf,
389238fd1498Szrj 			      slp_instance slp_node_instance, bool analyze_only,
389338fd1498Szrj 			      unsigned *n_perms)
389438fd1498Szrj {
389538fd1498Szrj   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
389638fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
389738fd1498Szrj   tree mask_element_type = NULL_TREE, mask_type;
389838fd1498Szrj   int vec_index = 0;
389938fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
390038fd1498Szrj   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
390138fd1498Szrj   unsigned int mask_element;
390238fd1498Szrj   machine_mode mode;
390338fd1498Szrj   unsigned HOST_WIDE_INT nunits, const_vf;
390438fd1498Szrj 
390538fd1498Szrj   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
390638fd1498Szrj     return false;
390738fd1498Szrj 
390838fd1498Szrj   stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
390938fd1498Szrj 
391038fd1498Szrj   mode = TYPE_MODE (vectype);
391138fd1498Szrj 
391238fd1498Szrj   /* At the moment, all permutations are represented using per-element
391338fd1498Szrj      indices, so we can't cope with variable vector lengths or
391438fd1498Szrj      vectorization factors.  */
391538fd1498Szrj   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
391638fd1498Szrj       || !vf.is_constant (&const_vf))
391738fd1498Szrj     return false;
391838fd1498Szrj 
391938fd1498Szrj   /* The generic VEC_PERM_EXPR code always uses an integral type of the
392038fd1498Szrj      same size as the vector element being permuted.  */
392138fd1498Szrj   mask_element_type = lang_hooks.types.type_for_mode
392238fd1498Szrj     (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
392338fd1498Szrj   mask_type = get_vectype_for_scalar_type (mask_element_type);
392438fd1498Szrj   vec_perm_builder mask (nunits, nunits, 1);
392538fd1498Szrj   mask.quick_grow (nunits);
392638fd1498Szrj   vec_perm_indices indices;
392738fd1498Szrj 
392838fd1498Szrj   /* Initialize the vect stmts of NODE to properly insert the generated
392938fd1498Szrj      stmts later.  */
393038fd1498Szrj   if (! analyze_only)
393138fd1498Szrj     for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
393238fd1498Szrj 	 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
393338fd1498Szrj       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
393438fd1498Szrj 
393538fd1498Szrj   /* Generate permutation masks for every NODE. Number of masks for each NODE
393638fd1498Szrj      is equal to GROUP_SIZE.
393738fd1498Szrj      E.g., we have a group of three nodes with three loads from the same
393838fd1498Szrj      location in each node, and the vector size is 4. I.e., we have a
393938fd1498Szrj      a0b0c0a1b1c1... sequence and we need to create the following vectors:
394038fd1498Szrj      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
394138fd1498Szrj      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
394238fd1498Szrj      ...
394338fd1498Szrj 
394438fd1498Szrj      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
394538fd1498Szrj      The last mask is illegal since we assume two operands for permute
394638fd1498Szrj      operation, and the mask element values can't be outside that range.
394738fd1498Szrj      Hence, the last mask must be converted into {2,5,5,5}.
394838fd1498Szrj      For the first two permutations we need the first and the second input
394938fd1498Szrj      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
395038fd1498Szrj      we need the second and the third vectors: {b1,c1,a2,b2} and
395138fd1498Szrj      {c2,a3,b3,c3}.  */
395238fd1498Szrj 
395338fd1498Szrj   int vect_stmts_counter = 0;
395438fd1498Szrj   unsigned int index = 0;
395538fd1498Szrj   int first_vec_index = -1;
395638fd1498Szrj   int second_vec_index = -1;
395738fd1498Szrj   bool noop_p = true;
395838fd1498Szrj   *n_perms = 0;
395938fd1498Szrj 
396038fd1498Szrj   for (unsigned int j = 0; j < const_vf; j++)
396138fd1498Szrj     {
396238fd1498Szrj       for (int k = 0; k < group_size; k++)
396338fd1498Szrj 	{
396438fd1498Szrj 	  unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
396538fd1498Szrj 			    + j * STMT_VINFO_GROUP_SIZE (stmt_info));
396638fd1498Szrj 	  vec_index = i / nunits;
396738fd1498Szrj 	  mask_element = i % nunits;
396838fd1498Szrj 	  if (vec_index == first_vec_index
396938fd1498Szrj 	      || first_vec_index == -1)
397038fd1498Szrj 	    {
397138fd1498Szrj 	      first_vec_index = vec_index;
397238fd1498Szrj 	    }
397338fd1498Szrj 	  else if (vec_index == second_vec_index
397438fd1498Szrj 		   || second_vec_index == -1)
397538fd1498Szrj 	    {
397638fd1498Szrj 	      second_vec_index = vec_index;
397738fd1498Szrj 	      mask_element += nunits;
397838fd1498Szrj 	    }
397938fd1498Szrj 	  else
398038fd1498Szrj 	    {
398138fd1498Szrj 	      if (dump_enabled_p ())
398238fd1498Szrj 		{
398338fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
398438fd1498Szrj 				   "permutation requires at "
398538fd1498Szrj 				   "least three vectors ");
398638fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
398738fd1498Szrj 				    stmt, 0);
398838fd1498Szrj 		}
398938fd1498Szrj 	      gcc_assert (analyze_only);
399038fd1498Szrj 	      return false;
399138fd1498Szrj 	    }
399238fd1498Szrj 
399338fd1498Szrj 	  gcc_assert (mask_element < 2 * nunits);
399438fd1498Szrj 	  if (mask_element != index)
399538fd1498Szrj 	    noop_p = false;
399638fd1498Szrj 	  mask[index++] = mask_element;
399738fd1498Szrj 
399838fd1498Szrj 	  if (index == nunits && !noop_p)
399938fd1498Szrj 	    {
400038fd1498Szrj 	      indices.new_vector (mask, 2, nunits);
400138fd1498Szrj 	      if (!can_vec_perm_const_p (mode, indices))
400238fd1498Szrj 		{
400338fd1498Szrj 		  if (dump_enabled_p ())
400438fd1498Szrj 		    {
400538fd1498Szrj 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION,
400638fd1498Szrj 				       vect_location,
400738fd1498Szrj 				       "unsupported vect permute { ");
400838fd1498Szrj 		      for (i = 0; i < nunits; ++i)
400938fd1498Szrj 			{
401038fd1498Szrj 			  dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
401138fd1498Szrj 			  dump_printf (MSG_MISSED_OPTIMIZATION, " ");
401238fd1498Szrj 			}
401338fd1498Szrj 		      dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
401438fd1498Szrj 		    }
401538fd1498Szrj 		  gcc_assert (analyze_only);
401638fd1498Szrj 		  return false;
401738fd1498Szrj 		}
401838fd1498Szrj 
401938fd1498Szrj 	      ++*n_perms;
402038fd1498Szrj 	    }
402138fd1498Szrj 
402238fd1498Szrj 	  if (index == nunits)
402338fd1498Szrj 	    {
402438fd1498Szrj 	      if (!analyze_only)
402538fd1498Szrj 		{
402638fd1498Szrj 		  tree mask_vec = NULL_TREE;
402738fd1498Szrj 
402838fd1498Szrj 		  if (! noop_p)
402938fd1498Szrj 		    mask_vec = vec_perm_indices_to_tree (mask_type, indices);
403038fd1498Szrj 
403138fd1498Szrj 		  if (second_vec_index == -1)
403238fd1498Szrj 		    second_vec_index = first_vec_index;
403338fd1498Szrj 
403438fd1498Szrj 		  /* Generate the permute statement if necessary.  */
403538fd1498Szrj 		  tree first_vec = dr_chain[first_vec_index];
403638fd1498Szrj 		  tree second_vec = dr_chain[second_vec_index];
403738fd1498Szrj 		  gimple *perm_stmt;
403838fd1498Szrj 		  if (! noop_p)
403938fd1498Szrj 		    {
404038fd1498Szrj 		      tree perm_dest
404138fd1498Szrj 			= vect_create_destination_var (gimple_assign_lhs (stmt),
404238fd1498Szrj 						       vectype);
404338fd1498Szrj 		      perm_dest = make_ssa_name (perm_dest);
404438fd1498Szrj 		      perm_stmt = gimple_build_assign (perm_dest,
404538fd1498Szrj 						       VEC_PERM_EXPR,
404638fd1498Szrj 						       first_vec, second_vec,
404738fd1498Szrj 						       mask_vec);
404838fd1498Szrj 		      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
404938fd1498Szrj 		    }
405038fd1498Szrj 		  else
405138fd1498Szrj 		    /* If mask was NULL_TREE generate the requested
405238fd1498Szrj 		       identity transform.  */
405338fd1498Szrj 		    perm_stmt = SSA_NAME_DEF_STMT (first_vec);
405438fd1498Szrj 
405538fd1498Szrj 		  /* Store the vector statement in NODE.  */
405638fd1498Szrj 		  SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
405738fd1498Szrj 		}
405838fd1498Szrj 
405938fd1498Szrj 	      index = 0;
406038fd1498Szrj 	      first_vec_index = -1;
406138fd1498Szrj 	      second_vec_index = -1;
406238fd1498Szrj 	      noop_p = true;
406338fd1498Szrj 	    }
406438fd1498Szrj 	}
406538fd1498Szrj     }
406638fd1498Szrj 
406738fd1498Szrj   return true;
406838fd1498Szrj }
406938fd1498Szrj 
407038fd1498Szrj typedef hash_map <vec <gimple *>, slp_tree,
407138fd1498Szrj 		  simple_hashmap_traits <bst_traits, slp_tree> >
407238fd1498Szrj   scalar_stmts_to_slp_tree_map_t;
407338fd1498Szrj 
407438fd1498Szrj /* Vectorize SLP instance tree in postorder.  */
407538fd1498Szrj 
407638fd1498Szrj static bool
vect_schedule_slp_instance(slp_tree node,slp_instance instance,scalar_stmts_to_slp_tree_map_t * bst_map)407738fd1498Szrj vect_schedule_slp_instance (slp_tree node, slp_instance instance,
407838fd1498Szrj 			    scalar_stmts_to_slp_tree_map_t *bst_map)
407938fd1498Szrj {
408038fd1498Szrj   gimple *stmt;
408138fd1498Szrj   bool grouped_store, is_store;
408238fd1498Szrj   gimple_stmt_iterator si;
408338fd1498Szrj   stmt_vec_info stmt_info;
408438fd1498Szrj   unsigned int group_size;
408538fd1498Szrj   tree vectype;
408638fd1498Szrj   int i, j;
408738fd1498Szrj   slp_tree child;
408838fd1498Szrj 
408938fd1498Szrj   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
409038fd1498Szrj     return false;
409138fd1498Szrj 
409238fd1498Szrj   /* See if we have already vectorized the same set of stmts and reuse their
409338fd1498Szrj      vectorized stmts.  */
4094*58e805e6Szrj   if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
409538fd1498Szrj     {
4096*58e805e6Szrj       SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
4097*58e805e6Szrj       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
4098*58e805e6Szrj 	= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
409938fd1498Szrj       return false;
410038fd1498Szrj     }
410138fd1498Szrj 
4102*58e805e6Szrj   bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
410338fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
410438fd1498Szrj     vect_schedule_slp_instance (child, instance, bst_map);
410538fd1498Szrj 
410638fd1498Szrj   /* Push SLP node def-type to stmts.  */
410738fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
410838fd1498Szrj     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
410938fd1498Szrj       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
411038fd1498Szrj 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
411138fd1498Szrj 
411238fd1498Szrj   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
411338fd1498Szrj   stmt_info = vinfo_for_stmt (stmt);
411438fd1498Szrj 
411538fd1498Szrj   /* VECTYPE is the type of the destination.  */
411638fd1498Szrj   vectype = STMT_VINFO_VECTYPE (stmt_info);
411738fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
411838fd1498Szrj   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
411938fd1498Szrj 
412038fd1498Szrj   if (!SLP_TREE_VEC_STMTS (node).exists ())
412138fd1498Szrj     SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
412238fd1498Szrj 
412338fd1498Szrj   if (dump_enabled_p ())
412438fd1498Szrj     {
412538fd1498Szrj       dump_printf_loc (MSG_NOTE,vect_location,
412638fd1498Szrj 		       "------>vectorizing SLP node starting from: ");
412738fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
412838fd1498Szrj     }
412938fd1498Szrj 
413038fd1498Szrj   /* Vectorized stmts go before the last scalar stmt which is where
413138fd1498Szrj      all uses are ready.  */
413238fd1498Szrj   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
413338fd1498Szrj 
413438fd1498Szrj   /* Mark the first element of the reduction chain as reduction to properly
413538fd1498Szrj      transform the node.  In the analysis phase only the last element of the
413638fd1498Szrj      chain is marked as reduction.  */
413738fd1498Szrj   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
413838fd1498Szrj       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
413938fd1498Szrj     {
414038fd1498Szrj       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
414138fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
414238fd1498Szrj     }
414338fd1498Szrj 
414438fd1498Szrj   /* Handle two-operation SLP nodes by vectorizing the group with
414538fd1498Szrj      both operations and then performing a merge.  */
414638fd1498Szrj   if (SLP_TREE_TWO_OPERATORS (node))
414738fd1498Szrj     {
414838fd1498Szrj       enum tree_code code0 = gimple_assign_rhs_code (stmt);
414938fd1498Szrj       enum tree_code ocode = ERROR_MARK;
415038fd1498Szrj       gimple *ostmt;
415138fd1498Szrj       vec_perm_builder mask (group_size, group_size, 1);
415238fd1498Szrj       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
415338fd1498Szrj 	if (gimple_assign_rhs_code (ostmt) != code0)
415438fd1498Szrj 	  {
415538fd1498Szrj 	    mask.quick_push (1);
415638fd1498Szrj 	    ocode = gimple_assign_rhs_code (ostmt);
415738fd1498Szrj 	  }
415838fd1498Szrj 	else
415938fd1498Szrj 	  mask.quick_push (0);
416038fd1498Szrj       if (ocode != ERROR_MARK)
416138fd1498Szrj 	{
416238fd1498Szrj 	  vec<gimple *> v0;
416338fd1498Szrj 	  vec<gimple *> v1;
416438fd1498Szrj 	  unsigned j;
416538fd1498Szrj 	  tree tmask = NULL_TREE;
416638fd1498Szrj 	  vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
416738fd1498Szrj 	  v0 = SLP_TREE_VEC_STMTS (node).copy ();
416838fd1498Szrj 	  SLP_TREE_VEC_STMTS (node).truncate (0);
416938fd1498Szrj 	  gimple_assign_set_rhs_code (stmt, ocode);
417038fd1498Szrj 	  vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
417138fd1498Szrj 	  gimple_assign_set_rhs_code (stmt, code0);
417238fd1498Szrj 	  v1 = SLP_TREE_VEC_STMTS (node).copy ();
417338fd1498Szrj 	  SLP_TREE_VEC_STMTS (node).truncate (0);
417438fd1498Szrj 	  tree meltype = build_nonstandard_integer_type
417538fd1498Szrj 	      (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
417638fd1498Szrj 	  tree mvectype = get_same_sized_vectype (meltype, vectype);
417738fd1498Szrj 	  unsigned k = 0, l;
417838fd1498Szrj 	  for (j = 0; j < v0.length (); ++j)
417938fd1498Szrj 	    {
418038fd1498Szrj 	      /* Enforced by vect_build_slp_tree, which rejects variable-length
418138fd1498Szrj 		 vectors for SLP_TREE_TWO_OPERATORS.  */
418238fd1498Szrj 	      unsigned int const_nunits = nunits.to_constant ();
418338fd1498Szrj 	      tree_vector_builder melts (mvectype, const_nunits, 1);
418438fd1498Szrj 	      for (l = 0; l < const_nunits; ++l)
418538fd1498Szrj 		{
418638fd1498Szrj 		  if (k >= group_size)
418738fd1498Szrj 		    k = 0;
418838fd1498Szrj 		  tree t = build_int_cst (meltype,
418938fd1498Szrj 					  mask[k++] * const_nunits + l);
419038fd1498Szrj 		  melts.quick_push (t);
419138fd1498Szrj 		}
419238fd1498Szrj 	      tmask = melts.build ();
419338fd1498Szrj 
419438fd1498Szrj 	      /* ???  Not all targets support a VEC_PERM_EXPR with a
419538fd1498Szrj 	         constant mask that would translate to a vec_merge RTX
419638fd1498Szrj 		 (with their vec_perm_const_ok).  We can either not
419738fd1498Szrj 		 vectorize in that case or let veclower do its job.
419838fd1498Szrj 		 Unfortunately that isn't too great and at least for
419938fd1498Szrj 		 plus/minus we'd eventually like to match targets
420038fd1498Szrj 		 vector addsub instructions.  */
420138fd1498Szrj 	      gimple *vstmt;
420238fd1498Szrj 	      vstmt = gimple_build_assign (make_ssa_name (vectype),
420338fd1498Szrj 					   VEC_PERM_EXPR,
420438fd1498Szrj 					   gimple_assign_lhs (v0[j]),
420538fd1498Szrj 					   gimple_assign_lhs (v1[j]), tmask);
420638fd1498Szrj 	      vect_finish_stmt_generation (stmt, vstmt, &si);
420738fd1498Szrj 	      SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
420838fd1498Szrj 	    }
420938fd1498Szrj 	  v0.release ();
421038fd1498Szrj 	  v1.release ();
421138fd1498Szrj 	  return false;
421238fd1498Szrj 	}
421338fd1498Szrj     }
421438fd1498Szrj   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
421538fd1498Szrj 
421638fd1498Szrj   /* Restore stmt def-types.  */
421738fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
421838fd1498Szrj     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
421938fd1498Szrj       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
422038fd1498Szrj 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
422138fd1498Szrj 
422238fd1498Szrj   return is_store;
422338fd1498Szrj }
422438fd1498Szrj 
422538fd1498Szrj /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
422638fd1498Szrj    For loop vectorization this is done in vectorizable_call, but for SLP
422738fd1498Szrj    it needs to be deferred until end of vect_schedule_slp, because multiple
422838fd1498Szrj    SLP instances may refer to the same scalar stmt.  */
422938fd1498Szrj 
423038fd1498Szrj static void
vect_remove_slp_scalar_calls(slp_tree node)423138fd1498Szrj vect_remove_slp_scalar_calls (slp_tree node)
423238fd1498Szrj {
423338fd1498Szrj   gimple *stmt, *new_stmt;
423438fd1498Szrj   gimple_stmt_iterator gsi;
423538fd1498Szrj   int i;
423638fd1498Szrj   slp_tree child;
423738fd1498Szrj   tree lhs;
423838fd1498Szrj   stmt_vec_info stmt_info;
423938fd1498Szrj 
424038fd1498Szrj   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
424138fd1498Szrj     return;
424238fd1498Szrj 
424338fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
424438fd1498Szrj     vect_remove_slp_scalar_calls (child);
424538fd1498Szrj 
424638fd1498Szrj   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
424738fd1498Szrj     {
424838fd1498Szrj       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
424938fd1498Szrj 	continue;
425038fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
425138fd1498Szrj       if (stmt_info == NULL
425238fd1498Szrj 	  || is_pattern_stmt_p (stmt_info)
425338fd1498Szrj 	  || !PURE_SLP_STMT (stmt_info))
425438fd1498Szrj 	continue;
425538fd1498Szrj       lhs = gimple_call_lhs (stmt);
425638fd1498Szrj       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
425738fd1498Szrj       set_vinfo_for_stmt (new_stmt, stmt_info);
425838fd1498Szrj       set_vinfo_for_stmt (stmt, NULL);
425938fd1498Szrj       STMT_VINFO_STMT (stmt_info) = new_stmt;
426038fd1498Szrj       gsi = gsi_for_stmt (stmt);
426138fd1498Szrj       gsi_replace (&gsi, new_stmt, false);
426238fd1498Szrj       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
426338fd1498Szrj     }
426438fd1498Szrj }
426538fd1498Szrj 
426638fd1498Szrj /* Generate vector code for all SLP instances in the loop/basic block.  */
426738fd1498Szrj 
426838fd1498Szrj bool
vect_schedule_slp(vec_info * vinfo)426938fd1498Szrj vect_schedule_slp (vec_info *vinfo)
427038fd1498Szrj {
427138fd1498Szrj   vec<slp_instance> slp_instances;
427238fd1498Szrj   slp_instance instance;
427338fd1498Szrj   unsigned int i;
427438fd1498Szrj   bool is_store = false;
427538fd1498Szrj 
427638fd1498Szrj 
427738fd1498Szrj   scalar_stmts_to_slp_tree_map_t *bst_map
427838fd1498Szrj     = new scalar_stmts_to_slp_tree_map_t ();
427938fd1498Szrj   slp_instances = vinfo->slp_instances;
428038fd1498Szrj   FOR_EACH_VEC_ELT (slp_instances, i, instance)
428138fd1498Szrj     {
428238fd1498Szrj       /* Schedule the tree of INSTANCE.  */
428338fd1498Szrj       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
428438fd1498Szrj                                              instance, bst_map);
428538fd1498Szrj       if (dump_enabled_p ())
428638fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
428738fd1498Szrj                          "vectorizing stmts using SLP.\n");
428838fd1498Szrj     }
428938fd1498Szrj   delete bst_map;
429038fd1498Szrj 
429138fd1498Szrj   FOR_EACH_VEC_ELT (slp_instances, i, instance)
429238fd1498Szrj     {
429338fd1498Szrj       slp_tree root = SLP_INSTANCE_TREE (instance);
429438fd1498Szrj       gimple *store;
429538fd1498Szrj       unsigned int j;
429638fd1498Szrj       gimple_stmt_iterator gsi;
429738fd1498Szrj 
429838fd1498Szrj       /* Remove scalar call stmts.  Do not do this for basic-block
429938fd1498Szrj 	 vectorization as not all uses may be vectorized.
430038fd1498Szrj 	 ???  Why should this be necessary?  DCE should be able to
430138fd1498Szrj 	 remove the stmts itself.
430238fd1498Szrj 	 ???  For BB vectorization we can as well remove scalar
430338fd1498Szrj 	 stmts starting from the SLP tree root if they have no
430438fd1498Szrj 	 uses.  */
430538fd1498Szrj       if (is_a <loop_vec_info> (vinfo))
430638fd1498Szrj 	vect_remove_slp_scalar_calls (root);
430738fd1498Szrj 
430838fd1498Szrj       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
430938fd1498Szrj                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
431038fd1498Szrj         {
431138fd1498Szrj           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
431238fd1498Szrj             break;
431338fd1498Szrj 
431438fd1498Szrj          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
431538fd1498Szrj            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
431638fd1498Szrj           /* Free the attached stmt_vec_info and remove the stmt.  */
431738fd1498Szrj           gsi = gsi_for_stmt (store);
431838fd1498Szrj 	  unlink_stmt_vdef (store);
431938fd1498Szrj           gsi_remove (&gsi, true);
432038fd1498Szrj 	  release_defs (store);
432138fd1498Szrj           free_stmt_vec_info (store);
432238fd1498Szrj         }
432338fd1498Szrj     }
432438fd1498Szrj 
432538fd1498Szrj   return is_store;
432638fd1498Szrj }
4327