138fd1498Szrj /* SLP - Basic Block Vectorization
238fd1498Szrj Copyright (C) 2007-2018 Free Software Foundation, Inc.
338fd1498Szrj Contributed by Dorit Naishlos <dorit@il.ibm.com>
438fd1498Szrj and Ira Rosen <irar@il.ibm.com>
538fd1498Szrj
638fd1498Szrj This file is part of GCC.
738fd1498Szrj
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
938fd1498Szrj the terms of the GNU General Public License as published by the Free
1038fd1498Szrj Software Foundation; either version 3, or (at your option) any later
1138fd1498Szrj version.
1238fd1498Szrj
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1438fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3. If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>. */
2138fd1498Szrj
2238fd1498Szrj #include "config.h"
2338fd1498Szrj #include "system.h"
2438fd1498Szrj #include "coretypes.h"
2538fd1498Szrj #include "backend.h"
2638fd1498Szrj #include "target.h"
2738fd1498Szrj #include "rtl.h"
2838fd1498Szrj #include "tree.h"
2938fd1498Szrj #include "gimple.h"
3038fd1498Szrj #include "tree-pass.h"
3138fd1498Szrj #include "ssa.h"
3238fd1498Szrj #include "optabs-tree.h"
3338fd1498Szrj #include "insn-config.h"
3438fd1498Szrj #include "recog.h" /* FIXME: for insn_data */
3538fd1498Szrj #include "params.h"
3638fd1498Szrj #include "fold-const.h"
3738fd1498Szrj #include "stor-layout.h"
3838fd1498Szrj #include "gimple-iterator.h"
3938fd1498Szrj #include "cfgloop.h"
4038fd1498Szrj #include "tree-vectorizer.h"
4138fd1498Szrj #include "langhooks.h"
4238fd1498Szrj #include "gimple-walk.h"
4338fd1498Szrj #include "dbgcnt.h"
4438fd1498Szrj #include "tree-vector-builder.h"
4538fd1498Szrj #include "vec-perm-indices.h"
4638fd1498Szrj #include "gimple-fold.h"
4738fd1498Szrj #include "internal-fn.h"
4838fd1498Szrj
4938fd1498Szrj
5038fd1498Szrj /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
5138fd1498Szrj
5238fd1498Szrj static void
vect_free_slp_tree(slp_tree node)5338fd1498Szrj vect_free_slp_tree (slp_tree node)
5438fd1498Szrj {
5538fd1498Szrj int i;
5638fd1498Szrj slp_tree child;
5738fd1498Szrj
5838fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
5938fd1498Szrj vect_free_slp_tree (child);
6038fd1498Szrj
6138fd1498Szrj gimple *stmt;
6238fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
6338fd1498Szrj /* After transform some stmts are removed and thus their vinfo is gone. */
6438fd1498Szrj if (vinfo_for_stmt (stmt))
6538fd1498Szrj {
6638fd1498Szrj gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
6738fd1498Szrj STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
6838fd1498Szrj }
6938fd1498Szrj
7038fd1498Szrj SLP_TREE_CHILDREN (node).release ();
7138fd1498Szrj SLP_TREE_SCALAR_STMTS (node).release ();
7238fd1498Szrj SLP_TREE_VEC_STMTS (node).release ();
7338fd1498Szrj SLP_TREE_LOAD_PERMUTATION (node).release ();
7438fd1498Szrj
7538fd1498Szrj free (node);
7638fd1498Szrj }
7738fd1498Szrj
7838fd1498Szrj
7938fd1498Szrj /* Free the memory allocated for the SLP instance. */
8038fd1498Szrj
8138fd1498Szrj void
vect_free_slp_instance(slp_instance instance)8238fd1498Szrj vect_free_slp_instance (slp_instance instance)
8338fd1498Szrj {
8438fd1498Szrj vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
8538fd1498Szrj SLP_INSTANCE_LOADS (instance).release ();
8638fd1498Szrj free (instance);
8738fd1498Szrj }
8838fd1498Szrj
8938fd1498Szrj
9038fd1498Szrj /* Create an SLP node for SCALAR_STMTS. */
9138fd1498Szrj
9238fd1498Szrj static slp_tree
vect_create_new_slp_node(vec<gimple * > scalar_stmts)9338fd1498Szrj vect_create_new_slp_node (vec<gimple *> scalar_stmts)
9438fd1498Szrj {
9538fd1498Szrj slp_tree node;
9638fd1498Szrj gimple *stmt = scalar_stmts[0];
9738fd1498Szrj unsigned int nops;
9838fd1498Szrj
9938fd1498Szrj if (is_gimple_call (stmt))
10038fd1498Szrj nops = gimple_call_num_args (stmt);
10138fd1498Szrj else if (is_gimple_assign (stmt))
10238fd1498Szrj {
10338fd1498Szrj nops = gimple_num_ops (stmt) - 1;
10438fd1498Szrj if (gimple_assign_rhs_code (stmt) == COND_EXPR)
10538fd1498Szrj nops++;
10638fd1498Szrj }
10738fd1498Szrj else if (gimple_code (stmt) == GIMPLE_PHI)
10838fd1498Szrj nops = 0;
10938fd1498Szrj else
11038fd1498Szrj return NULL;
11138fd1498Szrj
11238fd1498Szrj node = XNEW (struct _slp_tree);
11338fd1498Szrj SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
11438fd1498Szrj SLP_TREE_VEC_STMTS (node).create (0);
11538fd1498Szrj SLP_TREE_CHILDREN (node).create (nops);
11638fd1498Szrj SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
11738fd1498Szrj SLP_TREE_TWO_OPERATORS (node) = false;
11838fd1498Szrj SLP_TREE_DEF_TYPE (node) = vect_internal_def;
11938fd1498Szrj
12038fd1498Szrj unsigned i;
12138fd1498Szrj FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
12238fd1498Szrj STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
12338fd1498Szrj
12438fd1498Szrj return node;
12538fd1498Szrj }
12638fd1498Szrj
12738fd1498Szrj
12838fd1498Szrj /* This structure is used in creation of an SLP tree. Each instance
12938fd1498Szrj corresponds to the same operand in a group of scalar stmts in an SLP
13038fd1498Szrj node. */
13138fd1498Szrj typedef struct _slp_oprnd_info
13238fd1498Szrj {
13338fd1498Szrj /* Def-stmts for the operands. */
13438fd1498Szrj vec<gimple *> def_stmts;
13538fd1498Szrj /* Information about the first statement, its vector def-type, type, the
13638fd1498Szrj operand itself in case it's constant, and an indication if it's a pattern
13738fd1498Szrj stmt. */
13838fd1498Szrj tree first_op_type;
13938fd1498Szrj enum vect_def_type first_dt;
14038fd1498Szrj bool first_pattern;
14138fd1498Szrj bool second_pattern;
14238fd1498Szrj } *slp_oprnd_info;
14338fd1498Szrj
14438fd1498Szrj
14538fd1498Szrj /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
14638fd1498Szrj operand. */
14738fd1498Szrj static vec<slp_oprnd_info>
vect_create_oprnd_info(int nops,int group_size)14838fd1498Szrj vect_create_oprnd_info (int nops, int group_size)
14938fd1498Szrj {
15038fd1498Szrj int i;
15138fd1498Szrj slp_oprnd_info oprnd_info;
15238fd1498Szrj vec<slp_oprnd_info> oprnds_info;
15338fd1498Szrj
15438fd1498Szrj oprnds_info.create (nops);
15538fd1498Szrj for (i = 0; i < nops; i++)
15638fd1498Szrj {
15738fd1498Szrj oprnd_info = XNEW (struct _slp_oprnd_info);
15838fd1498Szrj oprnd_info->def_stmts.create (group_size);
15938fd1498Szrj oprnd_info->first_dt = vect_uninitialized_def;
16038fd1498Szrj oprnd_info->first_op_type = NULL_TREE;
16138fd1498Szrj oprnd_info->first_pattern = false;
16238fd1498Szrj oprnd_info->second_pattern = false;
16338fd1498Szrj oprnds_info.quick_push (oprnd_info);
16438fd1498Szrj }
16538fd1498Szrj
16638fd1498Szrj return oprnds_info;
16738fd1498Szrj }
16838fd1498Szrj
16938fd1498Szrj
17038fd1498Szrj /* Free operands info. */
17138fd1498Szrj
17238fd1498Szrj static void
vect_free_oprnd_info(vec<slp_oprnd_info> & oprnds_info)17338fd1498Szrj vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
17438fd1498Szrj {
17538fd1498Szrj int i;
17638fd1498Szrj slp_oprnd_info oprnd_info;
17738fd1498Szrj
17838fd1498Szrj FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
17938fd1498Szrj {
18038fd1498Szrj oprnd_info->def_stmts.release ();
18138fd1498Szrj XDELETE (oprnd_info);
18238fd1498Szrj }
18338fd1498Szrj
18438fd1498Szrj oprnds_info.release ();
18538fd1498Szrj }
18638fd1498Szrj
18738fd1498Szrj
18838fd1498Szrj /* Find the place of the data-ref in STMT in the interleaving chain that starts
18938fd1498Szrj from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
19038fd1498Szrj
19138fd1498Szrj int
vect_get_place_in_interleaving_chain(gimple * stmt,gimple * first_stmt)19238fd1498Szrj vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
19338fd1498Szrj {
19438fd1498Szrj gimple *next_stmt = first_stmt;
19538fd1498Szrj int result = 0;
19638fd1498Szrj
19738fd1498Szrj if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
19838fd1498Szrj return -1;
19938fd1498Szrj
20038fd1498Szrj do
20138fd1498Szrj {
20238fd1498Szrj if (next_stmt == stmt)
20338fd1498Szrj return result;
20438fd1498Szrj next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
20538fd1498Szrj if (next_stmt)
20638fd1498Szrj result += GROUP_GAP (vinfo_for_stmt (next_stmt));
20738fd1498Szrj }
20838fd1498Szrj while (next_stmt);
20938fd1498Szrj
21038fd1498Szrj return -1;
21138fd1498Szrj }
21238fd1498Szrj
21338fd1498Szrj /* Check whether it is possible to load COUNT elements of type ELT_MODE
21438fd1498Szrj using the method implemented by duplicate_and_interleave. Return true
21538fd1498Szrj if so, returning the number of intermediate vectors in *NVECTORS_OUT
21638fd1498Szrj (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
21738fd1498Szrj (if nonnull). */
21838fd1498Szrj
21938fd1498Szrj bool
can_duplicate_and_interleave_p(unsigned int count,machine_mode elt_mode,unsigned int * nvectors_out,tree * vector_type_out,tree * permutes)22038fd1498Szrj can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
22138fd1498Szrj unsigned int *nvectors_out,
22238fd1498Szrj tree *vector_type_out,
22338fd1498Szrj tree *permutes)
22438fd1498Szrj {
22538fd1498Szrj poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
22638fd1498Szrj poly_int64 nelts;
22738fd1498Szrj unsigned int nvectors = 1;
22838fd1498Szrj for (;;)
22938fd1498Szrj {
23038fd1498Szrj scalar_int_mode int_mode;
23138fd1498Szrj poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
23238fd1498Szrj if (multiple_p (current_vector_size, elt_bytes, &nelts)
23338fd1498Szrj && int_mode_for_size (elt_bits, 0).exists (&int_mode))
23438fd1498Szrj {
23538fd1498Szrj tree int_type = build_nonstandard_integer_type
23638fd1498Szrj (GET_MODE_BITSIZE (int_mode), 1);
23738fd1498Szrj tree vector_type = build_vector_type (int_type, nelts);
23838fd1498Szrj if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
23938fd1498Szrj {
24038fd1498Szrj vec_perm_builder sel1 (nelts, 2, 3);
24138fd1498Szrj vec_perm_builder sel2 (nelts, 2, 3);
24238fd1498Szrj poly_int64 half_nelts = exact_div (nelts, 2);
24338fd1498Szrj for (unsigned int i = 0; i < 3; ++i)
24438fd1498Szrj {
24538fd1498Szrj sel1.quick_push (i);
24638fd1498Szrj sel1.quick_push (i + nelts);
24738fd1498Szrj sel2.quick_push (half_nelts + i);
24838fd1498Szrj sel2.quick_push (half_nelts + i + nelts);
24938fd1498Szrj }
25038fd1498Szrj vec_perm_indices indices1 (sel1, 2, nelts);
25138fd1498Szrj vec_perm_indices indices2 (sel2, 2, nelts);
25238fd1498Szrj if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
25338fd1498Szrj && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
25438fd1498Szrj {
25538fd1498Szrj if (nvectors_out)
25638fd1498Szrj *nvectors_out = nvectors;
25738fd1498Szrj if (vector_type_out)
25838fd1498Szrj *vector_type_out = vector_type;
25938fd1498Szrj if (permutes)
26038fd1498Szrj {
26138fd1498Szrj permutes[0] = vect_gen_perm_mask_checked (vector_type,
26238fd1498Szrj indices1);
26338fd1498Szrj permutes[1] = vect_gen_perm_mask_checked (vector_type,
26438fd1498Szrj indices2);
26538fd1498Szrj }
26638fd1498Szrj return true;
26738fd1498Szrj }
26838fd1498Szrj }
26938fd1498Szrj }
27038fd1498Szrj if (!multiple_p (elt_bytes, 2, &elt_bytes))
27138fd1498Szrj return false;
27238fd1498Szrj nvectors *= 2;
27338fd1498Szrj }
27438fd1498Szrj }
27538fd1498Szrj
27638fd1498Szrj /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
27738fd1498Szrj they are of a valid type and that they match the defs of the first stmt of
27838fd1498Szrj the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
27938fd1498Szrj by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
28038fd1498Szrj indicates swap is required for cond_expr stmts. Specifically, *SWAP
28138fd1498Szrj is 1 if STMT is cond and operands of comparison need to be swapped;
28238fd1498Szrj *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
28338fd1498Szrj If there is any operand swap in this function, *SWAP is set to non-zero
28438fd1498Szrj value.
28538fd1498Szrj If there was a fatal error return -1; if the error could be corrected by
28638fd1498Szrj swapping operands of father node of this one, return 1; if everything is
28738fd1498Szrj ok return 0. */
28838fd1498Szrj static int
vect_get_and_check_slp_defs(vec_info * vinfo,unsigned char * swap,vec<gimple * > stmts,unsigned stmt_num,vec<slp_oprnd_info> * oprnds_info)28938fd1498Szrj vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
29038fd1498Szrj vec<gimple *> stmts, unsigned stmt_num,
29138fd1498Szrj vec<slp_oprnd_info> *oprnds_info)
29238fd1498Szrj {
29338fd1498Szrj gimple *stmt = stmts[stmt_num];
29438fd1498Szrj tree oprnd;
29538fd1498Szrj unsigned int i, number_of_oprnds;
29638fd1498Szrj gimple *def_stmt;
29738fd1498Szrj enum vect_def_type dt = vect_uninitialized_def;
29838fd1498Szrj bool pattern = false;
29938fd1498Szrj slp_oprnd_info oprnd_info;
30038fd1498Szrj int first_op_idx = 1;
30138fd1498Szrj bool commutative = false;
30238fd1498Szrj bool first_op_cond = false;
30338fd1498Szrj bool first = stmt_num == 0;
30438fd1498Szrj bool second = stmt_num == 1;
30538fd1498Szrj
30638fd1498Szrj if (is_gimple_call (stmt))
30738fd1498Szrj {
30838fd1498Szrj number_of_oprnds = gimple_call_num_args (stmt);
30938fd1498Szrj first_op_idx = 3;
31038fd1498Szrj }
31138fd1498Szrj else if (is_gimple_assign (stmt))
31238fd1498Szrj {
31338fd1498Szrj enum tree_code code = gimple_assign_rhs_code (stmt);
31438fd1498Szrj number_of_oprnds = gimple_num_ops (stmt) - 1;
31538fd1498Szrj /* Swap can only be done for cond_expr if asked to, otherwise we
31638fd1498Szrj could result in different comparison code to the first stmt. */
31738fd1498Szrj if (gimple_assign_rhs_code (stmt) == COND_EXPR
31838fd1498Szrj && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
31938fd1498Szrj {
32038fd1498Szrj first_op_cond = true;
32138fd1498Szrj number_of_oprnds++;
32238fd1498Szrj }
32338fd1498Szrj else
32438fd1498Szrj commutative = commutative_tree_code (code);
32538fd1498Szrj }
32638fd1498Szrj else
32738fd1498Szrj return -1;
32838fd1498Szrj
32938fd1498Szrj bool swapped = (*swap != 0);
33038fd1498Szrj gcc_assert (!swapped || first_op_cond);
33138fd1498Szrj for (i = 0; i < number_of_oprnds; i++)
33238fd1498Szrj {
33338fd1498Szrj again:
33438fd1498Szrj if (first_op_cond)
33538fd1498Szrj {
33638fd1498Szrj /* Map indicating how operands of cond_expr should be swapped. */
33738fd1498Szrj int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
33838fd1498Szrj int *map = maps[*swap];
33938fd1498Szrj
34038fd1498Szrj if (i < 2)
34138fd1498Szrj oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
34238fd1498Szrj else
34338fd1498Szrj oprnd = gimple_op (stmt, map[i]);
34438fd1498Szrj }
34538fd1498Szrj else
34638fd1498Szrj oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
34738fd1498Szrj
34838fd1498Szrj oprnd_info = (*oprnds_info)[i];
34938fd1498Szrj
35038fd1498Szrj if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
35138fd1498Szrj {
35238fd1498Szrj if (dump_enabled_p ())
35338fd1498Szrj {
35438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
35538fd1498Szrj "Build SLP failed: can't analyze def for ");
35638fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
35738fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
35838fd1498Szrj }
35938fd1498Szrj
36038fd1498Szrj return -1;
36138fd1498Szrj }
36238fd1498Szrj
36338fd1498Szrj /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
36438fd1498Szrj from the pattern. Check that all the stmts of the node are in the
36538fd1498Szrj pattern. */
36638fd1498Szrj if (def_stmt && gimple_bb (def_stmt)
36738fd1498Szrj && vect_stmt_in_region_p (vinfo, def_stmt)
36838fd1498Szrj && vinfo_for_stmt (def_stmt)
36938fd1498Szrj && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
37038fd1498Szrj && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
37138fd1498Szrj && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
37238fd1498Szrj {
37338fd1498Szrj pattern = true;
37438fd1498Szrj if (!first && !oprnd_info->first_pattern
37538fd1498Szrj /* Allow different pattern state for the defs of the
37638fd1498Szrj first stmt in reduction chains. */
37738fd1498Szrj && (oprnd_info->first_dt != vect_reduction_def
37838fd1498Szrj || (!second && !oprnd_info->second_pattern)))
37938fd1498Szrj {
38038fd1498Szrj if (i == 0
38138fd1498Szrj && !swapped
38238fd1498Szrj && commutative)
38338fd1498Szrj {
38438fd1498Szrj swapped = true;
38538fd1498Szrj goto again;
38638fd1498Szrj }
38738fd1498Szrj
38838fd1498Szrj if (dump_enabled_p ())
38938fd1498Szrj {
39038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
39138fd1498Szrj "Build SLP failed: some of the stmts"
39238fd1498Szrj " are in a pattern, and others are not ");
39338fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
39438fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
39538fd1498Szrj }
39638fd1498Szrj
39738fd1498Szrj return 1;
39838fd1498Szrj }
39938fd1498Szrj
40038fd1498Szrj def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
40138fd1498Szrj dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
40238fd1498Szrj
40338fd1498Szrj if (dt == vect_unknown_def_type)
40438fd1498Szrj {
40538fd1498Szrj if (dump_enabled_p ())
40638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
40738fd1498Szrj "Unsupported pattern.\n");
40838fd1498Szrj return -1;
40938fd1498Szrj }
41038fd1498Szrj
41138fd1498Szrj switch (gimple_code (def_stmt))
41238fd1498Szrj {
41338fd1498Szrj case GIMPLE_PHI:
41438fd1498Szrj case GIMPLE_ASSIGN:
41538fd1498Szrj break;
41638fd1498Szrj
41738fd1498Szrj default:
41838fd1498Szrj if (dump_enabled_p ())
41938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
42038fd1498Szrj "unsupported defining stmt:\n");
42138fd1498Szrj return -1;
42238fd1498Szrj }
42338fd1498Szrj }
42438fd1498Szrj
42538fd1498Szrj if (second)
42638fd1498Szrj oprnd_info->second_pattern = pattern;
42738fd1498Szrj
42838fd1498Szrj if (first)
42938fd1498Szrj {
43038fd1498Szrj oprnd_info->first_dt = dt;
43138fd1498Szrj oprnd_info->first_pattern = pattern;
43238fd1498Szrj oprnd_info->first_op_type = TREE_TYPE (oprnd);
43338fd1498Szrj }
43438fd1498Szrj else
43538fd1498Szrj {
43638fd1498Szrj /* Not first stmt of the group, check that the def-stmt/s match
43738fd1498Szrj the def-stmt/s of the first stmt. Allow different definition
43838fd1498Szrj types for reduction chains: the first stmt must be a
43938fd1498Szrj vect_reduction_def (a phi node), and the rest
44038fd1498Szrj vect_internal_def. */
44138fd1498Szrj tree type = TREE_TYPE (oprnd);
44238fd1498Szrj if ((oprnd_info->first_dt != dt
44338fd1498Szrj && !(oprnd_info->first_dt == vect_reduction_def
44438fd1498Szrj && dt == vect_internal_def)
44538fd1498Szrj && !((oprnd_info->first_dt == vect_external_def
44638fd1498Szrj || oprnd_info->first_dt == vect_constant_def)
44738fd1498Szrj && (dt == vect_external_def
44838fd1498Szrj || dt == vect_constant_def)))
44938fd1498Szrj || !types_compatible_p (oprnd_info->first_op_type, type))
45038fd1498Szrj {
45138fd1498Szrj /* Try swapping operands if we got a mismatch. */
45238fd1498Szrj if (i == 0
45338fd1498Szrj && !swapped
45438fd1498Szrj && commutative)
45538fd1498Szrj {
45638fd1498Szrj swapped = true;
45738fd1498Szrj goto again;
45838fd1498Szrj }
45938fd1498Szrj
46038fd1498Szrj if (dump_enabled_p ())
46138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
46238fd1498Szrj "Build SLP failed: different types\n");
46338fd1498Szrj
46438fd1498Szrj return 1;
46538fd1498Szrj }
46638fd1498Szrj if ((dt == vect_constant_def
46738fd1498Szrj || dt == vect_external_def)
46838fd1498Szrj && !current_vector_size.is_constant ()
46938fd1498Szrj && (TREE_CODE (type) == BOOLEAN_TYPE
47038fd1498Szrj || !can_duplicate_and_interleave_p (stmts.length (),
47138fd1498Szrj TYPE_MODE (type))))
47238fd1498Szrj {
47338fd1498Szrj if (dump_enabled_p ())
47438fd1498Szrj {
47538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
47638fd1498Szrj "Build SLP failed: invalid type of def "
47738fd1498Szrj "for variable-length SLP ");
47838fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
47938fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
48038fd1498Szrj }
48138fd1498Szrj return -1;
48238fd1498Szrj }
48338fd1498Szrj }
48438fd1498Szrj
48538fd1498Szrj /* Check the types of the definitions. */
48638fd1498Szrj switch (dt)
48738fd1498Szrj {
48838fd1498Szrj case vect_constant_def:
48938fd1498Szrj case vect_external_def:
49038fd1498Szrj break;
49138fd1498Szrj
49238fd1498Szrj case vect_reduction_def:
49338fd1498Szrj case vect_induction_def:
49438fd1498Szrj case vect_internal_def:
49538fd1498Szrj oprnd_info->def_stmts.quick_push (def_stmt);
49638fd1498Szrj break;
49738fd1498Szrj
49838fd1498Szrj default:
49938fd1498Szrj /* FORNOW: Not supported. */
50038fd1498Szrj if (dump_enabled_p ())
50138fd1498Szrj {
50238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
50338fd1498Szrj "Build SLP failed: illegal type of def ");
50438fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
50538fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
50638fd1498Szrj }
50738fd1498Szrj
50838fd1498Szrj return -1;
50938fd1498Szrj }
51038fd1498Szrj }
51138fd1498Szrj
51238fd1498Szrj /* Swap operands. */
51338fd1498Szrj if (swapped)
51438fd1498Szrj {
51538fd1498Szrj /* If there are already uses of this stmt in a SLP instance then
51638fd1498Szrj we've committed to the operand order and can't swap it. */
51738fd1498Szrj if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
51838fd1498Szrj {
51938fd1498Szrj if (dump_enabled_p ())
52038fd1498Szrj {
52138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
52238fd1498Szrj "Build SLP failed: cannot swap operands of "
52338fd1498Szrj "shared stmt ");
52438fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
52538fd1498Szrj }
52638fd1498Szrj return -1;
52738fd1498Szrj }
52838fd1498Szrj
52938fd1498Szrj if (first_op_cond)
53038fd1498Szrj {
53138fd1498Szrj tree cond = gimple_assign_rhs1 (stmt);
53238fd1498Szrj enum tree_code code = TREE_CODE (cond);
53338fd1498Szrj
53438fd1498Szrj /* Swap. */
53538fd1498Szrj if (*swap == 1)
53638fd1498Szrj {
53738fd1498Szrj swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
53838fd1498Szrj &TREE_OPERAND (cond, 1));
53938fd1498Szrj TREE_SET_CODE (cond, swap_tree_comparison (code));
54038fd1498Szrj }
54138fd1498Szrj /* Invert. */
54238fd1498Szrj else
54338fd1498Szrj {
54438fd1498Szrj swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
54538fd1498Szrj gimple_assign_rhs3_ptr (stmt));
54638fd1498Szrj bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
54738fd1498Szrj code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
54838fd1498Szrj gcc_assert (code != ERROR_MARK);
54938fd1498Szrj TREE_SET_CODE (cond, code);
55038fd1498Szrj }
55138fd1498Szrj }
55238fd1498Szrj else
55338fd1498Szrj swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
55438fd1498Szrj gimple_assign_rhs2_ptr (stmt));
55538fd1498Szrj if (dump_enabled_p ())
55638fd1498Szrj {
55738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
55838fd1498Szrj "swapped operands to match def types in ");
55938fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
56038fd1498Szrj }
56138fd1498Szrj }
56238fd1498Szrj
56338fd1498Szrj *swap = swapped;
56438fd1498Szrj return 0;
56538fd1498Szrj }
56638fd1498Szrj
56738fd1498Szrj /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
56838fd1498Szrj caller's attempt to find the vector type in STMT with the narrowest
56938fd1498Szrj element type. Return true if VECTYPE is nonnull and if it is valid
57038fd1498Szrj for VINFO. When returning true, update MAX_NUNITS to reflect the
57138fd1498Szrj number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
57238fd1498Szrj as for vect_build_slp_tree. */
57338fd1498Szrj
57438fd1498Szrj static bool
vect_record_max_nunits(vec_info * vinfo,gimple * stmt,unsigned int group_size,tree vectype,poly_uint64 * max_nunits)57538fd1498Szrj vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
57638fd1498Szrj tree vectype, poly_uint64 *max_nunits)
57738fd1498Szrj {
57838fd1498Szrj if (!vectype)
57938fd1498Szrj {
58038fd1498Szrj if (dump_enabled_p ())
58138fd1498Szrj {
58238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
58338fd1498Szrj "Build SLP failed: unsupported data-type in ");
58438fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
58538fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
58638fd1498Szrj }
58738fd1498Szrj /* Fatal mismatch. */
58838fd1498Szrj return false;
58938fd1498Szrj }
59038fd1498Szrj
59138fd1498Szrj /* If populating the vector type requires unrolling then fail
59238fd1498Szrj before adjusting *max_nunits for basic-block vectorization. */
59338fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
59438fd1498Szrj unsigned HOST_WIDE_INT const_nunits;
59538fd1498Szrj if (is_a <bb_vec_info> (vinfo)
59638fd1498Szrj && (!nunits.is_constant (&const_nunits)
59738fd1498Szrj || const_nunits > group_size))
59838fd1498Szrj {
59938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
60038fd1498Szrj "Build SLP failed: unrolling required "
60138fd1498Szrj "in basic block SLP\n");
60238fd1498Szrj /* Fatal mismatch. */
60338fd1498Szrj return false;
60438fd1498Szrj }
60538fd1498Szrj
60638fd1498Szrj /* In case of multiple types we need to detect the smallest type. */
60738fd1498Szrj vect_update_max_nunits (max_nunits, vectype);
60838fd1498Szrj return true;
60938fd1498Szrj }
61038fd1498Szrj
61138fd1498Szrj /* Verify if the scalar stmts STMTS are isomorphic, require data
61238fd1498Szrj permutation or are of unsupported types of operation. Return
61338fd1498Szrj true if they are, otherwise return false and indicate in *MATCHES
61438fd1498Szrj which stmts are not isomorphic to the first one. If MATCHES[0]
61538fd1498Szrj is false then this indicates the comparison could not be
61638fd1498Szrj carried out or the stmts will never be vectorized by SLP.
61738fd1498Szrj
61838fd1498Szrj Note COND_EXPR is possibly ismorphic to another one after swapping its
61938fd1498Szrj operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
62038fd1498Szrj the first stmt by swapping the two operands of comparison; set SWAP[i]
62138fd1498Szrj to 2 if stmt I is isormorphic to the first stmt by inverting the code
62238fd1498Szrj of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
62338fd1498Szrj to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
62438fd1498Szrj
62538fd1498Szrj static bool
vect_build_slp_tree_1(vec_info * vinfo,unsigned char * swap,vec<gimple * > stmts,unsigned int group_size,unsigned nops,poly_uint64 * max_nunits,bool * matches,bool * two_operators)62638fd1498Szrj vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
62738fd1498Szrj vec<gimple *> stmts, unsigned int group_size,
62838fd1498Szrj unsigned nops, poly_uint64 *max_nunits,
62938fd1498Szrj bool *matches, bool *two_operators)
63038fd1498Szrj {
63138fd1498Szrj unsigned int i;
63238fd1498Szrj gimple *first_stmt = stmts[0], *stmt = stmts[0];
63338fd1498Szrj enum tree_code first_stmt_code = ERROR_MARK;
63438fd1498Szrj enum tree_code alt_stmt_code = ERROR_MARK;
63538fd1498Szrj enum tree_code rhs_code = ERROR_MARK;
63638fd1498Szrj enum tree_code first_cond_code = ERROR_MARK;
63738fd1498Szrj tree lhs;
63838fd1498Szrj bool need_same_oprnds = false;
63938fd1498Szrj tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
64038fd1498Szrj optab optab;
64138fd1498Szrj int icode;
64238fd1498Szrj machine_mode optab_op2_mode;
64338fd1498Szrj machine_mode vec_mode;
64438fd1498Szrj HOST_WIDE_INT dummy;
64538fd1498Szrj gimple *first_load = NULL, *prev_first_load = NULL;
64638fd1498Szrj
64738fd1498Szrj /* For every stmt in NODE find its def stmt/s. */
64838fd1498Szrj FOR_EACH_VEC_ELT (stmts, i, stmt)
64938fd1498Szrj {
65038fd1498Szrj swap[i] = 0;
65138fd1498Szrj matches[i] = false;
65238fd1498Szrj
65338fd1498Szrj if (dump_enabled_p ())
65438fd1498Szrj {
65538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
65638fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
65738fd1498Szrj }
65838fd1498Szrj
65938fd1498Szrj /* Fail to vectorize statements marked as unvectorizable. */
66038fd1498Szrj if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
66138fd1498Szrj {
66238fd1498Szrj if (dump_enabled_p ())
66338fd1498Szrj {
66438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
66538fd1498Szrj "Build SLP failed: unvectorizable statement ");
66638fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
66738fd1498Szrj }
66838fd1498Szrj /* Fatal mismatch. */
66938fd1498Szrj matches[0] = false;
67038fd1498Szrj return false;
67138fd1498Szrj }
67238fd1498Szrj
67338fd1498Szrj lhs = gimple_get_lhs (stmt);
67438fd1498Szrj if (lhs == NULL_TREE)
67538fd1498Szrj {
67638fd1498Szrj if (dump_enabled_p ())
67738fd1498Szrj {
67838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
67938fd1498Szrj "Build SLP failed: not GIMPLE_ASSIGN nor "
68038fd1498Szrj "GIMPLE_CALL ");
68138fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
68238fd1498Szrj }
68338fd1498Szrj /* Fatal mismatch. */
68438fd1498Szrj matches[0] = false;
68538fd1498Szrj return false;
68638fd1498Szrj }
68738fd1498Szrj
68838fd1498Szrj scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
68938fd1498Szrj vectype = get_vectype_for_scalar_type (scalar_type);
69038fd1498Szrj if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
69138fd1498Szrj max_nunits))
69238fd1498Szrj {
69338fd1498Szrj /* Fatal mismatch. */
69438fd1498Szrj matches[0] = false;
69538fd1498Szrj return false;
69638fd1498Szrj }
69738fd1498Szrj
69838fd1498Szrj if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
69938fd1498Szrj {
70038fd1498Szrj rhs_code = CALL_EXPR;
70138fd1498Szrj if (gimple_call_internal_p (call_stmt)
70238fd1498Szrj || gimple_call_tail_p (call_stmt)
70338fd1498Szrj || gimple_call_noreturn_p (call_stmt)
70438fd1498Szrj || !gimple_call_nothrow_p (call_stmt)
70538fd1498Szrj || gimple_call_chain (call_stmt))
70638fd1498Szrj {
70738fd1498Szrj if (dump_enabled_p ())
70838fd1498Szrj {
70938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
71038fd1498Szrj "Build SLP failed: unsupported call type ");
71138fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
71238fd1498Szrj call_stmt, 0);
71338fd1498Szrj }
71438fd1498Szrj /* Fatal mismatch. */
71538fd1498Szrj matches[0] = false;
71638fd1498Szrj return false;
71738fd1498Szrj }
71838fd1498Szrj }
71938fd1498Szrj else
72038fd1498Szrj rhs_code = gimple_assign_rhs_code (stmt);
72138fd1498Szrj
72238fd1498Szrj /* Check the operation. */
72338fd1498Szrj if (i == 0)
72438fd1498Szrj {
72538fd1498Szrj first_stmt_code = rhs_code;
72638fd1498Szrj
72738fd1498Szrj /* Shift arguments should be equal in all the packed stmts for a
72838fd1498Szrj vector shift with scalar shift operand. */
72938fd1498Szrj if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
73038fd1498Szrj || rhs_code == LROTATE_EXPR
73138fd1498Szrj || rhs_code == RROTATE_EXPR)
73238fd1498Szrj {
73338fd1498Szrj vec_mode = TYPE_MODE (vectype);
73438fd1498Szrj
73538fd1498Szrj /* First see if we have a vector/vector shift. */
73638fd1498Szrj optab = optab_for_tree_code (rhs_code, vectype,
73738fd1498Szrj optab_vector);
73838fd1498Szrj
73938fd1498Szrj if (!optab
74038fd1498Szrj || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
74138fd1498Szrj {
74238fd1498Szrj /* No vector/vector shift, try for a vector/scalar shift. */
74338fd1498Szrj optab = optab_for_tree_code (rhs_code, vectype,
74438fd1498Szrj optab_scalar);
74538fd1498Szrj
74638fd1498Szrj if (!optab)
74738fd1498Szrj {
74838fd1498Szrj if (dump_enabled_p ())
74938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
75038fd1498Szrj "Build SLP failed: no optab.\n");
75138fd1498Szrj /* Fatal mismatch. */
75238fd1498Szrj matches[0] = false;
75338fd1498Szrj return false;
75438fd1498Szrj }
75538fd1498Szrj icode = (int) optab_handler (optab, vec_mode);
75638fd1498Szrj if (icode == CODE_FOR_nothing)
75738fd1498Szrj {
75838fd1498Szrj if (dump_enabled_p ())
75938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
76038fd1498Szrj "Build SLP failed: "
76138fd1498Szrj "op not supported by target.\n");
76238fd1498Szrj /* Fatal mismatch. */
76338fd1498Szrj matches[0] = false;
76438fd1498Szrj return false;
76538fd1498Szrj }
76638fd1498Szrj optab_op2_mode = insn_data[icode].operand[2].mode;
76738fd1498Szrj if (!VECTOR_MODE_P (optab_op2_mode))
76838fd1498Szrj {
76938fd1498Szrj need_same_oprnds = true;
77038fd1498Szrj first_op1 = gimple_assign_rhs2 (stmt);
77138fd1498Szrj }
77238fd1498Szrj }
77338fd1498Szrj }
77438fd1498Szrj else if (rhs_code == WIDEN_LSHIFT_EXPR)
77538fd1498Szrj {
77638fd1498Szrj need_same_oprnds = true;
77738fd1498Szrj first_op1 = gimple_assign_rhs2 (stmt);
77838fd1498Szrj }
77938fd1498Szrj }
78038fd1498Szrj else
78138fd1498Szrj {
78238fd1498Szrj if (first_stmt_code != rhs_code
78338fd1498Szrj && alt_stmt_code == ERROR_MARK)
78438fd1498Szrj alt_stmt_code = rhs_code;
78538fd1498Szrj if (first_stmt_code != rhs_code
78638fd1498Szrj && (first_stmt_code != IMAGPART_EXPR
78738fd1498Szrj || rhs_code != REALPART_EXPR)
78838fd1498Szrj && (first_stmt_code != REALPART_EXPR
78938fd1498Szrj || rhs_code != IMAGPART_EXPR)
79038fd1498Szrj /* Handle mismatches in plus/minus by computing both
79138fd1498Szrj and merging the results. */
79238fd1498Szrj && !((first_stmt_code == PLUS_EXPR
79338fd1498Szrj || first_stmt_code == MINUS_EXPR)
79438fd1498Szrj && (alt_stmt_code == PLUS_EXPR
79538fd1498Szrj || alt_stmt_code == MINUS_EXPR)
79638fd1498Szrj && rhs_code == alt_stmt_code)
79738fd1498Szrj && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
79838fd1498Szrj && (first_stmt_code == ARRAY_REF
79938fd1498Szrj || first_stmt_code == BIT_FIELD_REF
80038fd1498Szrj || first_stmt_code == INDIRECT_REF
80138fd1498Szrj || first_stmt_code == COMPONENT_REF
80238fd1498Szrj || first_stmt_code == MEM_REF)))
80338fd1498Szrj {
80438fd1498Szrj if (dump_enabled_p ())
80538fd1498Szrj {
80638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
80738fd1498Szrj "Build SLP failed: different operation "
80838fd1498Szrj "in stmt ");
80938fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
81038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
81138fd1498Szrj "original stmt ");
81238fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
81338fd1498Szrj first_stmt, 0);
81438fd1498Szrj }
81538fd1498Szrj /* Mismatch. */
81638fd1498Szrj continue;
81738fd1498Szrj }
81838fd1498Szrj
81938fd1498Szrj if (need_same_oprnds
82038fd1498Szrj && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
82138fd1498Szrj {
82238fd1498Szrj if (dump_enabled_p ())
82338fd1498Szrj {
82438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
82538fd1498Szrj "Build SLP failed: different shift "
82638fd1498Szrj "arguments in ");
82738fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
82838fd1498Szrj }
82938fd1498Szrj /* Mismatch. */
83038fd1498Szrj continue;
83138fd1498Szrj }
83238fd1498Szrj
83338fd1498Szrj if (rhs_code == CALL_EXPR)
83438fd1498Szrj {
83538fd1498Szrj gimple *first_stmt = stmts[0];
83638fd1498Szrj if (gimple_call_num_args (stmt) != nops
83738fd1498Szrj || !operand_equal_p (gimple_call_fn (first_stmt),
83838fd1498Szrj gimple_call_fn (stmt), 0)
83938fd1498Szrj || gimple_call_fntype (first_stmt)
84038fd1498Szrj != gimple_call_fntype (stmt))
84138fd1498Szrj {
84238fd1498Szrj if (dump_enabled_p ())
84338fd1498Szrj {
84438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
84538fd1498Szrj "Build SLP failed: different calls in ");
84638fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
84738fd1498Szrj stmt, 0);
84838fd1498Szrj }
84938fd1498Szrj /* Mismatch. */
85038fd1498Szrj continue;
85138fd1498Szrj }
85238fd1498Szrj }
85338fd1498Szrj }
85438fd1498Szrj
85538fd1498Szrj /* Grouped store or load. */
85638fd1498Szrj if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
85738fd1498Szrj {
85838fd1498Szrj if (REFERENCE_CLASS_P (lhs))
85938fd1498Szrj {
86038fd1498Szrj /* Store. */
86138fd1498Szrj ;
86238fd1498Szrj }
86338fd1498Szrj else
86438fd1498Szrj {
86538fd1498Szrj /* Load. */
86638fd1498Szrj first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
86738fd1498Szrj if (prev_first_load)
86838fd1498Szrj {
86938fd1498Szrj /* Check that there are no loads from different interleaving
87038fd1498Szrj chains in the same node. */
87138fd1498Szrj if (prev_first_load != first_load)
87238fd1498Szrj {
87338fd1498Szrj if (dump_enabled_p ())
87438fd1498Szrj {
87538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION,
87638fd1498Szrj vect_location,
87738fd1498Szrj "Build SLP failed: different "
87838fd1498Szrj "interleaving chains in one node ");
87938fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
88038fd1498Szrj stmt, 0);
88138fd1498Szrj }
88238fd1498Szrj /* Mismatch. */
88338fd1498Szrj continue;
88438fd1498Szrj }
88538fd1498Szrj }
88638fd1498Szrj else
88738fd1498Szrj prev_first_load = first_load;
88838fd1498Szrj }
88938fd1498Szrj } /* Grouped access. */
89038fd1498Szrj else
89138fd1498Szrj {
89238fd1498Szrj if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
89338fd1498Szrj {
89438fd1498Szrj /* Not grouped load. */
89538fd1498Szrj if (dump_enabled_p ())
89638fd1498Szrj {
89738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
89838fd1498Szrj "Build SLP failed: not grouped load ");
89938fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
90038fd1498Szrj }
90138fd1498Szrj
90238fd1498Szrj /* FORNOW: Not grouped loads are not supported. */
90338fd1498Szrj /* Fatal mismatch. */
90438fd1498Szrj matches[0] = false;
90538fd1498Szrj return false;
90638fd1498Szrj }
90738fd1498Szrj
90838fd1498Szrj /* Not memory operation. */
90938fd1498Szrj if (TREE_CODE_CLASS (rhs_code) != tcc_binary
91038fd1498Szrj && TREE_CODE_CLASS (rhs_code) != tcc_unary
91138fd1498Szrj && TREE_CODE_CLASS (rhs_code) != tcc_expression
91238fd1498Szrj && TREE_CODE_CLASS (rhs_code) != tcc_comparison
91338fd1498Szrj && rhs_code != CALL_EXPR)
91438fd1498Szrj {
91538fd1498Szrj if (dump_enabled_p ())
91638fd1498Szrj {
91738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
91838fd1498Szrj "Build SLP failed: operation");
91938fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
92038fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
92138fd1498Szrj }
92238fd1498Szrj /* Fatal mismatch. */
92338fd1498Szrj matches[0] = false;
92438fd1498Szrj return false;
92538fd1498Szrj }
92638fd1498Szrj
92738fd1498Szrj if (rhs_code == COND_EXPR)
92838fd1498Szrj {
92938fd1498Szrj tree cond_expr = gimple_assign_rhs1 (stmt);
93038fd1498Szrj enum tree_code cond_code = TREE_CODE (cond_expr);
93138fd1498Szrj enum tree_code swap_code = ERROR_MARK;
93238fd1498Szrj enum tree_code invert_code = ERROR_MARK;
93338fd1498Szrj
93438fd1498Szrj if (i == 0)
93538fd1498Szrj first_cond_code = TREE_CODE (cond_expr);
93638fd1498Szrj else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
93738fd1498Szrj {
93838fd1498Szrj bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
93938fd1498Szrj swap_code = swap_tree_comparison (cond_code);
94038fd1498Szrj invert_code = invert_tree_comparison (cond_code, honor_nans);
94138fd1498Szrj }
94238fd1498Szrj
94338fd1498Szrj if (first_cond_code == cond_code)
94438fd1498Szrj ;
94538fd1498Szrj /* Isomorphic can be achieved by swapping. */
94638fd1498Szrj else if (first_cond_code == swap_code)
94738fd1498Szrj swap[i] = 1;
94838fd1498Szrj /* Isomorphic can be achieved by inverting. */
94938fd1498Szrj else if (first_cond_code == invert_code)
95038fd1498Szrj swap[i] = 2;
95138fd1498Szrj else
95238fd1498Szrj {
95338fd1498Szrj if (dump_enabled_p ())
95438fd1498Szrj {
95538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
95638fd1498Szrj "Build SLP failed: different"
95738fd1498Szrj " operation");
95838fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
95938fd1498Szrj stmt, 0);
96038fd1498Szrj }
96138fd1498Szrj /* Mismatch. */
96238fd1498Szrj continue;
96338fd1498Szrj }
96438fd1498Szrj }
96538fd1498Szrj }
96638fd1498Szrj
96738fd1498Szrj matches[i] = true;
96838fd1498Szrj }
96938fd1498Szrj
97038fd1498Szrj for (i = 0; i < group_size; ++i)
97138fd1498Szrj if (!matches[i])
97238fd1498Szrj return false;
97338fd1498Szrj
97438fd1498Szrj /* If we allowed a two-operation SLP node verify the target can cope
97538fd1498Szrj with the permute we are going to use. */
97638fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
97738fd1498Szrj if (alt_stmt_code != ERROR_MARK
97838fd1498Szrj && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
97938fd1498Szrj {
98038fd1498Szrj unsigned HOST_WIDE_INT count;
98138fd1498Szrj if (!nunits.is_constant (&count))
98238fd1498Szrj {
98338fd1498Szrj if (dump_enabled_p ())
98438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
98538fd1498Szrj "Build SLP failed: different operations "
98638fd1498Szrj "not allowed with variable-length SLP.\n");
98738fd1498Szrj return false;
98838fd1498Szrj }
98938fd1498Szrj vec_perm_builder sel (count, count, 1);
99038fd1498Szrj for (i = 0; i < count; ++i)
99138fd1498Szrj {
99238fd1498Szrj unsigned int elt = i;
99338fd1498Szrj if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
99438fd1498Szrj elt += count;
99538fd1498Szrj sel.quick_push (elt);
99638fd1498Szrj }
99738fd1498Szrj vec_perm_indices indices (sel, 2, count);
99838fd1498Szrj if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
99938fd1498Szrj {
100038fd1498Szrj for (i = 0; i < group_size; ++i)
100138fd1498Szrj if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
100238fd1498Szrj {
100338fd1498Szrj matches[i] = false;
100438fd1498Szrj if (dump_enabled_p ())
100538fd1498Szrj {
100638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
100738fd1498Szrj "Build SLP failed: different operation "
100838fd1498Szrj "in stmt ");
100938fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
101038fd1498Szrj stmts[i], 0);
101138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
101238fd1498Szrj "original stmt ");
101338fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
101438fd1498Szrj first_stmt, 0);
101538fd1498Szrj }
101638fd1498Szrj }
101738fd1498Szrj return false;
101838fd1498Szrj }
101938fd1498Szrj *two_operators = true;
102038fd1498Szrj }
102138fd1498Szrj
102238fd1498Szrj return true;
102338fd1498Szrj }
102438fd1498Szrj
102538fd1498Szrj /* Traits for the hash_set to record failed SLP builds for a stmt set.
102638fd1498Szrj Note we never remove apart from at destruction time so we do not
102738fd1498Szrj need a special value for deleted that differs from empty. */
102838fd1498Szrj struct bst_traits
102938fd1498Szrj {
103038fd1498Szrj typedef vec <gimple *> value_type;
103138fd1498Szrj typedef vec <gimple *> compare_type;
103238fd1498Szrj static inline hashval_t hash (value_type);
103338fd1498Szrj static inline bool equal (value_type existing, value_type candidate);
is_emptybst_traits103438fd1498Szrj static inline bool is_empty (value_type x) { return !x.exists (); }
is_deletedbst_traits103538fd1498Szrj static inline bool is_deleted (value_type x) { return !x.exists (); }
mark_emptybst_traits103638fd1498Szrj static inline void mark_empty (value_type &x) { x.release (); }
mark_deletedbst_traits103738fd1498Szrj static inline void mark_deleted (value_type &x) { x.release (); }
removebst_traits103838fd1498Szrj static inline void remove (value_type &x) { x.release (); }
103938fd1498Szrj };
104038fd1498Szrj inline hashval_t
hash(value_type x)104138fd1498Szrj bst_traits::hash (value_type x)
104238fd1498Szrj {
104338fd1498Szrj inchash::hash h;
104438fd1498Szrj for (unsigned i = 0; i < x.length (); ++i)
104538fd1498Szrj h.add_int (gimple_uid (x[i]));
104638fd1498Szrj return h.end ();
104738fd1498Szrj }
104838fd1498Szrj inline bool
equal(value_type existing,value_type candidate)104938fd1498Szrj bst_traits::equal (value_type existing, value_type candidate)
105038fd1498Szrj {
105138fd1498Szrj if (existing.length () != candidate.length ())
105238fd1498Szrj return false;
105338fd1498Szrj for (unsigned i = 0; i < existing.length (); ++i)
105438fd1498Szrj if (existing[i] != candidate[i])
105538fd1498Szrj return false;
105638fd1498Szrj return true;
105738fd1498Szrj }
105838fd1498Szrj
105938fd1498Szrj typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
106038fd1498Szrj static scalar_stmts_set_t *bst_fail;
106138fd1498Szrj
106238fd1498Szrj static slp_tree
106338fd1498Szrj vect_build_slp_tree_2 (vec_info *vinfo,
106438fd1498Szrj vec<gimple *> stmts, unsigned int group_size,
106538fd1498Szrj poly_uint64 *max_nunits,
106638fd1498Szrj vec<slp_tree> *loads,
106738fd1498Szrj bool *matches, unsigned *npermutes, unsigned *tree_size,
106838fd1498Szrj unsigned max_tree_size);
106938fd1498Szrj
107038fd1498Szrj static slp_tree
vect_build_slp_tree(vec_info * vinfo,vec<gimple * > stmts,unsigned int group_size,poly_uint64 * max_nunits,vec<slp_tree> * loads,bool * matches,unsigned * npermutes,unsigned * tree_size,unsigned max_tree_size)107138fd1498Szrj vect_build_slp_tree (vec_info *vinfo,
107238fd1498Szrj vec<gimple *> stmts, unsigned int group_size,
107338fd1498Szrj poly_uint64 *max_nunits, vec<slp_tree> *loads,
107438fd1498Szrj bool *matches, unsigned *npermutes, unsigned *tree_size,
107538fd1498Szrj unsigned max_tree_size)
107638fd1498Szrj {
107738fd1498Szrj if (bst_fail->contains (stmts))
107838fd1498Szrj return NULL;
107938fd1498Szrj slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
108038fd1498Szrj loads, matches, npermutes, tree_size,
108138fd1498Szrj max_tree_size);
108238fd1498Szrj /* When SLP build fails for stmts record this, otherwise SLP build
108338fd1498Szrj can be exponential in time when we allow to construct parts from
108438fd1498Szrj scalars, see PR81723. */
108538fd1498Szrj if (! res)
108638fd1498Szrj {
108738fd1498Szrj vec <gimple *> x;
108838fd1498Szrj x.create (stmts.length ());
108938fd1498Szrj x.splice (stmts);
109038fd1498Szrj bst_fail->add (x);
109138fd1498Szrj }
109238fd1498Szrj return res;
109338fd1498Szrj }
109438fd1498Szrj
109538fd1498Szrj /* Recursively build an SLP tree starting from NODE.
109638fd1498Szrj Fail (and return a value not equal to zero) if def-stmts are not
109738fd1498Szrj isomorphic, require data permutation or are of unsupported types of
109838fd1498Szrj operation. Otherwise, return 0.
109938fd1498Szrj The value returned is the depth in the SLP tree where a mismatch
110038fd1498Szrj was found. */
110138fd1498Szrj
110238fd1498Szrj static slp_tree
vect_build_slp_tree_2(vec_info * vinfo,vec<gimple * > stmts,unsigned int group_size,poly_uint64 * max_nunits,vec<slp_tree> * loads,bool * matches,unsigned * npermutes,unsigned * tree_size,unsigned max_tree_size)110338fd1498Szrj vect_build_slp_tree_2 (vec_info *vinfo,
110438fd1498Szrj vec<gimple *> stmts, unsigned int group_size,
110538fd1498Szrj poly_uint64 *max_nunits,
110638fd1498Szrj vec<slp_tree> *loads,
110738fd1498Szrj bool *matches, unsigned *npermutes, unsigned *tree_size,
110838fd1498Szrj unsigned max_tree_size)
110938fd1498Szrj {
111038fd1498Szrj unsigned nops, i, this_tree_size = 0;
111138fd1498Szrj poly_uint64 this_max_nunits = *max_nunits;
111238fd1498Szrj gimple *stmt;
111338fd1498Szrj slp_tree node;
111438fd1498Szrj
111538fd1498Szrj matches[0] = false;
111638fd1498Szrj
111738fd1498Szrj stmt = stmts[0];
111838fd1498Szrj if (is_gimple_call (stmt))
111938fd1498Szrj nops = gimple_call_num_args (stmt);
112038fd1498Szrj else if (is_gimple_assign (stmt))
112138fd1498Szrj {
112238fd1498Szrj nops = gimple_num_ops (stmt) - 1;
112338fd1498Szrj if (gimple_assign_rhs_code (stmt) == COND_EXPR)
112438fd1498Szrj nops++;
112538fd1498Szrj }
112638fd1498Szrj else if (gimple_code (stmt) == GIMPLE_PHI)
112738fd1498Szrj nops = 0;
112838fd1498Szrj else
112938fd1498Szrj return NULL;
113038fd1498Szrj
113138fd1498Szrj /* If the SLP node is a PHI (induction or reduction), terminate
113238fd1498Szrj the recursion. */
113338fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
113438fd1498Szrj {
113538fd1498Szrj tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
113638fd1498Szrj tree vectype = get_vectype_for_scalar_type (scalar_type);
113738fd1498Szrj if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
113838fd1498Szrj max_nunits))
113938fd1498Szrj return NULL;
114038fd1498Szrj
114138fd1498Szrj vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
114238fd1498Szrj /* Induction from different IVs is not supported. */
114338fd1498Szrj if (def_type == vect_induction_def)
114438fd1498Szrj {
114538fd1498Szrj FOR_EACH_VEC_ELT (stmts, i, stmt)
114638fd1498Szrj if (stmt != stmts[0])
114738fd1498Szrj return NULL;
114838fd1498Szrj }
114938fd1498Szrj else
115038fd1498Szrj {
115138fd1498Szrj /* Else def types have to match. */
115238fd1498Szrj FOR_EACH_VEC_ELT (stmts, i, stmt)
115338fd1498Szrj {
115438fd1498Szrj /* But for reduction chains only check on the first stmt. */
115538fd1498Szrj if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
115638fd1498Szrj && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
115738fd1498Szrj continue;
115838fd1498Szrj if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
115938fd1498Szrj return NULL;
116038fd1498Szrj }
116138fd1498Szrj }
116238fd1498Szrj node = vect_create_new_slp_node (stmts);
116338fd1498Szrj return node;
116438fd1498Szrj }
116538fd1498Szrj
116638fd1498Szrj
116738fd1498Szrj bool two_operators = false;
116838fd1498Szrj unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
116938fd1498Szrj if (!vect_build_slp_tree_1 (vinfo, swap,
117038fd1498Szrj stmts, group_size, nops,
117138fd1498Szrj &this_max_nunits, matches, &two_operators))
117238fd1498Szrj return NULL;
117338fd1498Szrj
117438fd1498Szrj /* If the SLP node is a load, terminate the recursion. */
117538fd1498Szrj if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
117638fd1498Szrj && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
117738fd1498Szrj {
117838fd1498Szrj *max_nunits = this_max_nunits;
117938fd1498Szrj node = vect_create_new_slp_node (stmts);
118038fd1498Szrj loads->safe_push (node);
118138fd1498Szrj return node;
118238fd1498Szrj }
118338fd1498Szrj
118438fd1498Szrj /* Get at the operands, verifying they are compatible. */
118538fd1498Szrj vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
118638fd1498Szrj slp_oprnd_info oprnd_info;
118738fd1498Szrj FOR_EACH_VEC_ELT (stmts, i, stmt)
118838fd1498Szrj {
118938fd1498Szrj int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
119038fd1498Szrj stmts, i, &oprnds_info);
119138fd1498Szrj if (res != 0)
119238fd1498Szrj matches[(res == -1) ? 0 : i] = false;
119338fd1498Szrj if (!matches[0])
119438fd1498Szrj break;
119538fd1498Szrj }
119638fd1498Szrj for (i = 0; i < group_size; ++i)
119738fd1498Szrj if (!matches[i])
119838fd1498Szrj {
119938fd1498Szrj vect_free_oprnd_info (oprnds_info);
120038fd1498Szrj return NULL;
120138fd1498Szrj }
120238fd1498Szrj
120338fd1498Szrj auto_vec<slp_tree, 4> children;
120438fd1498Szrj auto_vec<slp_tree> this_loads;
120538fd1498Szrj
120638fd1498Szrj stmt = stmts[0];
120738fd1498Szrj
120838fd1498Szrj if (tree_size)
120938fd1498Szrj max_tree_size -= *tree_size;
121038fd1498Szrj
121138fd1498Szrj /* Create SLP_TREE nodes for the definition node/s. */
121238fd1498Szrj FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
121338fd1498Szrj {
121438fd1498Szrj slp_tree child;
121538fd1498Szrj unsigned old_nloads = this_loads.length ();
121638fd1498Szrj unsigned old_tree_size = this_tree_size;
121738fd1498Szrj unsigned int j;
121838fd1498Szrj
121938fd1498Szrj if (oprnd_info->first_dt != vect_internal_def
122038fd1498Szrj && oprnd_info->first_dt != vect_reduction_def
122138fd1498Szrj && oprnd_info->first_dt != vect_induction_def)
122238fd1498Szrj continue;
122338fd1498Szrj
122438fd1498Szrj if (++this_tree_size > max_tree_size)
122538fd1498Szrj {
122638fd1498Szrj FOR_EACH_VEC_ELT (children, j, child)
122738fd1498Szrj vect_free_slp_tree (child);
122838fd1498Szrj vect_free_oprnd_info (oprnds_info);
122938fd1498Szrj return NULL;
123038fd1498Szrj }
123138fd1498Szrj
123238fd1498Szrj if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
123338fd1498Szrj group_size, &this_max_nunits,
123438fd1498Szrj &this_loads, matches, npermutes,
123538fd1498Szrj &this_tree_size,
123638fd1498Szrj max_tree_size)) != NULL)
123738fd1498Szrj {
123838fd1498Szrj /* If we have all children of child built up from scalars then just
123938fd1498Szrj throw that away and build it up this node from scalars. */
124038fd1498Szrj if (!SLP_TREE_CHILDREN (child).is_empty ()
124138fd1498Szrj /* ??? Rejecting patterns this way doesn't work. We'd have to
124238fd1498Szrj do extra work to cancel the pattern so the uses see the
124338fd1498Szrj scalar version. */
124438fd1498Szrj && !is_pattern_stmt_p
124538fd1498Szrj (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
124638fd1498Szrj {
124738fd1498Szrj slp_tree grandchild;
124838fd1498Szrj
124938fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
125038fd1498Szrj if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
125138fd1498Szrj break;
125238fd1498Szrj if (!grandchild)
125338fd1498Szrj {
125438fd1498Szrj /* Roll back. */
125538fd1498Szrj this_loads.truncate (old_nloads);
125638fd1498Szrj this_tree_size = old_tree_size;
125738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
125838fd1498Szrj vect_free_slp_tree (grandchild);
125938fd1498Szrj SLP_TREE_CHILDREN (child).truncate (0);
126038fd1498Szrj
126138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
126238fd1498Szrj "Building parent vector operands from "
126338fd1498Szrj "scalars instead\n");
126438fd1498Szrj oprnd_info->def_stmts = vNULL;
126538fd1498Szrj SLP_TREE_DEF_TYPE (child) = vect_external_def;
126638fd1498Szrj children.safe_push (child);
126738fd1498Szrj continue;
126838fd1498Szrj }
126938fd1498Szrj }
127038fd1498Szrj
127138fd1498Szrj oprnd_info->def_stmts = vNULL;
127238fd1498Szrj children.safe_push (child);
127338fd1498Szrj continue;
127438fd1498Szrj }
127538fd1498Szrj
127638fd1498Szrj /* If the SLP build failed fatally and we analyze a basic-block
127738fd1498Szrj simply treat nodes we fail to build as externally defined
127838fd1498Szrj (and thus build vectors from the scalar defs).
127938fd1498Szrj The cost model will reject outright expensive cases.
128038fd1498Szrj ??? This doesn't treat cases where permutation ultimatively
128138fd1498Szrj fails (or we don't try permutation below). Ideally we'd
128238fd1498Szrj even compute a permutation that will end up with the maximum
128338fd1498Szrj SLP tree size... */
128438fd1498Szrj if (is_a <bb_vec_info> (vinfo)
128538fd1498Szrj && !matches[0]
128638fd1498Szrj /* ??? Rejecting patterns this way doesn't work. We'd have to
128738fd1498Szrj do extra work to cancel the pattern so the uses see the
128838fd1498Szrj scalar version. */
128938fd1498Szrj && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
129038fd1498Szrj {
129138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
129238fd1498Szrj "Building vector operands from scalars\n");
129338fd1498Szrj child = vect_create_new_slp_node (oprnd_info->def_stmts);
129438fd1498Szrj SLP_TREE_DEF_TYPE (child) = vect_external_def;
129538fd1498Szrj children.safe_push (child);
129638fd1498Szrj oprnd_info->def_stmts = vNULL;
129738fd1498Szrj continue;
129838fd1498Szrj }
129938fd1498Szrj
130038fd1498Szrj /* If the SLP build for operand zero failed and operand zero
130138fd1498Szrj and one can be commutated try that for the scalar stmts
130238fd1498Szrj that failed the match. */
130338fd1498Szrj if (i == 0
130438fd1498Szrj /* A first scalar stmt mismatch signals a fatal mismatch. */
130538fd1498Szrj && matches[0]
130638fd1498Szrj /* ??? For COND_EXPRs we can swap the comparison operands
130738fd1498Szrj as well as the arms under some constraints. */
130838fd1498Szrj && nops == 2
130938fd1498Szrj && oprnds_info[1]->first_dt == vect_internal_def
131038fd1498Szrj && is_gimple_assign (stmt)
131138fd1498Szrj /* Do so only if the number of not successful permutes was nor more
131238fd1498Szrj than a cut-ff as re-trying the recursive match on
131338fd1498Szrj possibly each level of the tree would expose exponential
131438fd1498Szrj behavior. */
131538fd1498Szrj && *npermutes < 4)
131638fd1498Szrj {
131738fd1498Szrj /* See whether we can swap the matching or the non-matching
131838fd1498Szrj stmt operands. */
131938fd1498Szrj bool swap_not_matching = true;
132038fd1498Szrj do
132138fd1498Szrj {
132238fd1498Szrj for (j = 0; j < group_size; ++j)
132338fd1498Szrj {
132438fd1498Szrj if (matches[j] != !swap_not_matching)
132538fd1498Szrj continue;
132638fd1498Szrj gimple *stmt = stmts[j];
132738fd1498Szrj /* Verify if we can swap operands of this stmt. */
132838fd1498Szrj if (!is_gimple_assign (stmt)
132938fd1498Szrj || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
133038fd1498Szrj {
133138fd1498Szrj if (!swap_not_matching)
133238fd1498Szrj goto fail;
133338fd1498Szrj swap_not_matching = false;
133438fd1498Szrj break;
133538fd1498Szrj }
133638fd1498Szrj /* Verify if we can safely swap or if we committed to a
133738fd1498Szrj specific operand order already.
133838fd1498Szrj ??? Instead of modifying GIMPLE stmts here we could
133938fd1498Szrj record whether we want to swap operands in the SLP
134038fd1498Szrj node and temporarily do that when processing it
134138fd1498Szrj (or wrap operand accessors in a helper). */
134238fd1498Szrj else if (swap[j] != 0
134338fd1498Szrj || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
134438fd1498Szrj {
134538fd1498Szrj if (!swap_not_matching)
134638fd1498Szrj {
134738fd1498Szrj if (dump_enabled_p ())
134838fd1498Szrj {
134938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION,
135038fd1498Szrj vect_location,
135138fd1498Szrj "Build SLP failed: cannot swap "
135238fd1498Szrj "operands of shared stmt ");
135338fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
135438fd1498Szrj TDF_SLIM, stmts[j], 0);
135538fd1498Szrj }
135638fd1498Szrj goto fail;
135738fd1498Szrj }
135838fd1498Szrj swap_not_matching = false;
135938fd1498Szrj break;
136038fd1498Szrj }
136138fd1498Szrj }
136238fd1498Szrj }
136338fd1498Szrj while (j != group_size);
136438fd1498Szrj
136538fd1498Szrj /* Swap mismatched definition stmts. */
136638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
136738fd1498Szrj "Re-trying with swapped operands of stmts ");
136838fd1498Szrj for (j = 0; j < group_size; ++j)
136938fd1498Szrj if (matches[j] == !swap_not_matching)
137038fd1498Szrj {
137138fd1498Szrj std::swap (oprnds_info[0]->def_stmts[j],
137238fd1498Szrj oprnds_info[1]->def_stmts[j]);
137338fd1498Szrj dump_printf (MSG_NOTE, "%d ", j);
137438fd1498Szrj }
137538fd1498Szrj dump_printf (MSG_NOTE, "\n");
137638fd1498Szrj /* And try again with scratch 'matches' ... */
137738fd1498Szrj bool *tem = XALLOCAVEC (bool, group_size);
137838fd1498Szrj if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
137938fd1498Szrj group_size, &this_max_nunits,
138038fd1498Szrj &this_loads, tem, npermutes,
138138fd1498Szrj &this_tree_size,
138238fd1498Szrj max_tree_size)) != NULL)
138338fd1498Szrj {
138438fd1498Szrj /* ... so if successful we can apply the operand swapping
138538fd1498Szrj to the GIMPLE IL. This is necessary because for example
138638fd1498Szrj vect_get_slp_defs uses operand indexes and thus expects
138738fd1498Szrj canonical operand order. This is also necessary even
138838fd1498Szrj if we end up building the operand from scalars as
138938fd1498Szrj we'll continue to process swapped operand two. */
139038fd1498Szrj for (j = 0; j < group_size; ++j)
139138fd1498Szrj {
139238fd1498Szrj gimple *stmt = stmts[j];
139338fd1498Szrj gimple_set_plf (stmt, GF_PLF_1, false);
139438fd1498Szrj }
139538fd1498Szrj for (j = 0; j < group_size; ++j)
139638fd1498Szrj {
139738fd1498Szrj gimple *stmt = stmts[j];
139838fd1498Szrj if (matches[j] == !swap_not_matching)
139938fd1498Szrj {
140038fd1498Szrj /* Avoid swapping operands twice. */
140138fd1498Szrj if (gimple_plf (stmt, GF_PLF_1))
140238fd1498Szrj continue;
140338fd1498Szrj swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
140438fd1498Szrj gimple_assign_rhs2_ptr (stmt));
140538fd1498Szrj gimple_set_plf (stmt, GF_PLF_1, true);
140638fd1498Szrj }
140738fd1498Szrj }
140838fd1498Szrj /* Verify we swap all duplicates or none. */
140938fd1498Szrj if (flag_checking)
141038fd1498Szrj for (j = 0; j < group_size; ++j)
141138fd1498Szrj {
141238fd1498Szrj gimple *stmt = stmts[j];
141338fd1498Szrj gcc_assert (gimple_plf (stmt, GF_PLF_1)
141438fd1498Szrj == (matches[j] == !swap_not_matching));
141538fd1498Szrj }
141638fd1498Szrj
141738fd1498Szrj /* If we have all children of child built up from scalars then
141838fd1498Szrj just throw that away and build it up this node from scalars. */
141938fd1498Szrj if (!SLP_TREE_CHILDREN (child).is_empty ()
142038fd1498Szrj /* ??? Rejecting patterns this way doesn't work. We'd have
142138fd1498Szrj to do extra work to cancel the pattern so the uses see the
142238fd1498Szrj scalar version. */
142338fd1498Szrj && !is_pattern_stmt_p
142438fd1498Szrj (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
142538fd1498Szrj {
142638fd1498Szrj unsigned int j;
142738fd1498Szrj slp_tree grandchild;
142838fd1498Szrj
142938fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
143038fd1498Szrj if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
143138fd1498Szrj break;
143238fd1498Szrj if (!grandchild)
143338fd1498Szrj {
143438fd1498Szrj /* Roll back. */
143538fd1498Szrj this_loads.truncate (old_nloads);
143638fd1498Szrj this_tree_size = old_tree_size;
143738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
143838fd1498Szrj vect_free_slp_tree (grandchild);
143938fd1498Szrj SLP_TREE_CHILDREN (child).truncate (0);
144038fd1498Szrj
144138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
144238fd1498Szrj "Building parent vector operands from "
144338fd1498Szrj "scalars instead\n");
144438fd1498Szrj oprnd_info->def_stmts = vNULL;
144538fd1498Szrj SLP_TREE_DEF_TYPE (child) = vect_external_def;
144638fd1498Szrj children.safe_push (child);
144738fd1498Szrj continue;
144838fd1498Szrj }
144938fd1498Szrj }
145038fd1498Szrj
145138fd1498Szrj oprnd_info->def_stmts = vNULL;
145238fd1498Szrj children.safe_push (child);
145338fd1498Szrj continue;
145438fd1498Szrj }
145538fd1498Szrj
145638fd1498Szrj ++*npermutes;
145738fd1498Szrj }
145838fd1498Szrj
145938fd1498Szrj fail:
146038fd1498Szrj gcc_assert (child == NULL);
146138fd1498Szrj FOR_EACH_VEC_ELT (children, j, child)
146238fd1498Szrj vect_free_slp_tree (child);
146338fd1498Szrj vect_free_oprnd_info (oprnds_info);
146438fd1498Szrj return NULL;
146538fd1498Szrj }
146638fd1498Szrj
146738fd1498Szrj vect_free_oprnd_info (oprnds_info);
146838fd1498Szrj
146938fd1498Szrj if (tree_size)
147038fd1498Szrj *tree_size += this_tree_size;
147138fd1498Szrj *max_nunits = this_max_nunits;
147238fd1498Szrj loads->safe_splice (this_loads);
147338fd1498Szrj
147438fd1498Szrj node = vect_create_new_slp_node (stmts);
147538fd1498Szrj SLP_TREE_TWO_OPERATORS (node) = two_operators;
147638fd1498Szrj SLP_TREE_CHILDREN (node).splice (children);
147738fd1498Szrj return node;
147838fd1498Szrj }
147938fd1498Szrj
148038fd1498Szrj /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
148138fd1498Szrj
148238fd1498Szrj static void
vect_print_slp_tree(dump_flags_t dump_kind,location_t loc,slp_tree node)148338fd1498Szrj vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
148438fd1498Szrj {
148538fd1498Szrj int i;
148638fd1498Szrj gimple *stmt;
148738fd1498Szrj slp_tree child;
148838fd1498Szrj
148938fd1498Szrj dump_printf_loc (dump_kind, loc, "node%s\n",
149038fd1498Szrj SLP_TREE_DEF_TYPE (node) != vect_internal_def
149138fd1498Szrj ? " (external)" : "");
149238fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
149338fd1498Szrj {
149438fd1498Szrj dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
149538fd1498Szrj dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
149638fd1498Szrj }
149738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
149838fd1498Szrj vect_print_slp_tree (dump_kind, loc, child);
149938fd1498Szrj }
150038fd1498Szrj
150138fd1498Szrj
150238fd1498Szrj /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
150338fd1498Szrj If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
150438fd1498Szrj J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
150538fd1498Szrj stmts in NODE are to be marked. */
150638fd1498Szrj
150738fd1498Szrj static void
vect_mark_slp_stmts(slp_tree node,enum slp_vect_type mark,int j)150838fd1498Szrj vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
150938fd1498Szrj {
151038fd1498Szrj int i;
151138fd1498Szrj gimple *stmt;
151238fd1498Szrj slp_tree child;
151338fd1498Szrj
151438fd1498Szrj if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
151538fd1498Szrj return;
151638fd1498Szrj
151738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
151838fd1498Szrj if (j < 0 || i == j)
151938fd1498Szrj STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
152038fd1498Szrj
152138fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
152238fd1498Szrj vect_mark_slp_stmts (child, mark, j);
152338fd1498Szrj }
152438fd1498Szrj
152538fd1498Szrj
152638fd1498Szrj /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
152738fd1498Szrj
152838fd1498Szrj static void
vect_mark_slp_stmts_relevant(slp_tree node)152938fd1498Szrj vect_mark_slp_stmts_relevant (slp_tree node)
153038fd1498Szrj {
153138fd1498Szrj int i;
153238fd1498Szrj gimple *stmt;
153338fd1498Szrj stmt_vec_info stmt_info;
153438fd1498Szrj slp_tree child;
153538fd1498Szrj
153638fd1498Szrj if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
153738fd1498Szrj return;
153838fd1498Szrj
153938fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
154038fd1498Szrj {
154138fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
154238fd1498Szrj gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
154338fd1498Szrj || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
154438fd1498Szrj STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
154538fd1498Szrj }
154638fd1498Szrj
154738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
154838fd1498Szrj vect_mark_slp_stmts_relevant (child);
154938fd1498Szrj }
155038fd1498Szrj
155138fd1498Szrj
155238fd1498Szrj /* Rearrange the statements of NODE according to PERMUTATION. */
155338fd1498Szrj
155438fd1498Szrj static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,vec<unsigned> permutation)155538fd1498Szrj vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
155638fd1498Szrj vec<unsigned> permutation)
155738fd1498Szrj {
155838fd1498Szrj gimple *stmt;
155938fd1498Szrj vec<gimple *> tmp_stmts;
156038fd1498Szrj unsigned int i;
156138fd1498Szrj slp_tree child;
156238fd1498Szrj
156338fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
156438fd1498Szrj vect_slp_rearrange_stmts (child, group_size, permutation);
156538fd1498Szrj
156638fd1498Szrj gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
156738fd1498Szrj tmp_stmts.create (group_size);
156838fd1498Szrj tmp_stmts.quick_grow_cleared (group_size);
156938fd1498Szrj
157038fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
157138fd1498Szrj tmp_stmts[permutation[i]] = stmt;
157238fd1498Szrj
157338fd1498Szrj SLP_TREE_SCALAR_STMTS (node).release ();
157438fd1498Szrj SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
157538fd1498Szrj }
157638fd1498Szrj
157738fd1498Szrj
157838fd1498Szrj /* Attempt to reorder stmts in a reduction chain so that we don't
157938fd1498Szrj require any load permutation. Return true if that was possible,
158038fd1498Szrj otherwise return false. */
158138fd1498Szrj
158238fd1498Szrj static bool
vect_attempt_slp_rearrange_stmts(slp_instance slp_instn)158338fd1498Szrj vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
158438fd1498Szrj {
158538fd1498Szrj unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
158638fd1498Szrj unsigned int i, j;
158738fd1498Szrj unsigned int lidx;
158838fd1498Szrj slp_tree node, load;
158938fd1498Szrj
159038fd1498Szrj /* Compare all the permutation sequences to the first one. We know
159138fd1498Szrj that at least one load is permuted. */
159238fd1498Szrj node = SLP_INSTANCE_LOADS (slp_instn)[0];
159338fd1498Szrj if (!node->load_permutation.exists ())
159438fd1498Szrj return false;
159538fd1498Szrj for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
159638fd1498Szrj {
159738fd1498Szrj if (!load->load_permutation.exists ())
159838fd1498Szrj return false;
159938fd1498Szrj FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
160038fd1498Szrj if (lidx != node->load_permutation[j])
160138fd1498Szrj return false;
160238fd1498Szrj }
160338fd1498Szrj
160438fd1498Szrj /* Check that the loads in the first sequence are different and there
160538fd1498Szrj are no gaps between them. */
160638fd1498Szrj auto_sbitmap load_index (group_size);
160738fd1498Szrj bitmap_clear (load_index);
160838fd1498Szrj FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
160938fd1498Szrj {
161038fd1498Szrj if (lidx >= group_size)
161138fd1498Szrj return false;
161238fd1498Szrj if (bitmap_bit_p (load_index, lidx))
161338fd1498Szrj return false;
161438fd1498Szrj
161538fd1498Szrj bitmap_set_bit (load_index, lidx);
161638fd1498Szrj }
161738fd1498Szrj for (i = 0; i < group_size; i++)
161838fd1498Szrj if (!bitmap_bit_p (load_index, i))
161938fd1498Szrj return false;
162038fd1498Szrj
162138fd1498Szrj /* This permutation is valid for reduction. Since the order of the
162238fd1498Szrj statements in the nodes is not important unless they are memory
162338fd1498Szrj accesses, we can rearrange the statements in all the nodes
162438fd1498Szrj according to the order of the loads. */
162538fd1498Szrj vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
162638fd1498Szrj node->load_permutation);
162738fd1498Szrj
162838fd1498Szrj /* We are done, no actual permutations need to be generated. */
162938fd1498Szrj poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
163038fd1498Szrj FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
163138fd1498Szrj {
163238fd1498Szrj gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
163338fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
163438fd1498Szrj /* But we have to keep those permutations that are required because
163538fd1498Szrj of handling of gaps. */
163638fd1498Szrj if (known_eq (unrolling_factor, 1U)
163738fd1498Szrj || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
163838fd1498Szrj && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
163938fd1498Szrj SLP_TREE_LOAD_PERMUTATION (node).release ();
164038fd1498Szrj else
164138fd1498Szrj for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
164238fd1498Szrj SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
164338fd1498Szrj }
164438fd1498Szrj
164538fd1498Szrj return true;
164638fd1498Szrj }
164738fd1498Szrj
164838fd1498Szrj /* Check if the required load permutations in the SLP instance
164938fd1498Szrj SLP_INSTN are supported. */
165038fd1498Szrj
165138fd1498Szrj static bool
vect_supported_load_permutation_p(slp_instance slp_instn)165238fd1498Szrj vect_supported_load_permutation_p (slp_instance slp_instn)
165338fd1498Szrj {
165438fd1498Szrj unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
165538fd1498Szrj unsigned int i, j, k, next;
165638fd1498Szrj slp_tree node;
165738fd1498Szrj gimple *stmt, *load, *next_load;
165838fd1498Szrj
165938fd1498Szrj if (dump_enabled_p ())
166038fd1498Szrj {
166138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
166238fd1498Szrj FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
166338fd1498Szrj if (node->load_permutation.exists ())
166438fd1498Szrj FOR_EACH_VEC_ELT (node->load_permutation, j, next)
166538fd1498Szrj dump_printf (MSG_NOTE, "%d ", next);
166638fd1498Szrj else
166738fd1498Szrj for (k = 0; k < group_size; ++k)
166838fd1498Szrj dump_printf (MSG_NOTE, "%d ", k);
166938fd1498Szrj dump_printf (MSG_NOTE, "\n");
167038fd1498Szrj }
167138fd1498Szrj
167238fd1498Szrj /* In case of reduction every load permutation is allowed, since the order
167338fd1498Szrj of the reduction statements is not important (as opposed to the case of
167438fd1498Szrj grouped stores). The only condition we need to check is that all the
167538fd1498Szrj load nodes are of the same size and have the same permutation (and then
167638fd1498Szrj rearrange all the nodes of the SLP instance according to this
167738fd1498Szrj permutation). */
167838fd1498Szrj
167938fd1498Szrj /* Check that all the load nodes are of the same size. */
168038fd1498Szrj /* ??? Can't we assert this? */
168138fd1498Szrj FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
168238fd1498Szrj if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
168338fd1498Szrj return false;
168438fd1498Szrj
168538fd1498Szrj node = SLP_INSTANCE_TREE (slp_instn);
168638fd1498Szrj stmt = SLP_TREE_SCALAR_STMTS (node)[0];
168738fd1498Szrj
168838fd1498Szrj /* Reduction (there are no data-refs in the root).
168938fd1498Szrj In reduction chain the order of the loads is not important. */
169038fd1498Szrj if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
169138fd1498Szrj && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
169238fd1498Szrj vect_attempt_slp_rearrange_stmts (slp_instn);
169338fd1498Szrj
169438fd1498Szrj /* In basic block vectorization we allow any subchain of an interleaving
169538fd1498Szrj chain.
169638fd1498Szrj FORNOW: not supported in loop SLP because of realignment compications. */
169738fd1498Szrj if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
169838fd1498Szrj {
169938fd1498Szrj /* Check whether the loads in an instance form a subchain and thus
170038fd1498Szrj no permutation is necessary. */
170138fd1498Szrj FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
170238fd1498Szrj {
170338fd1498Szrj if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
170438fd1498Szrj continue;
170538fd1498Szrj bool subchain_p = true;
170638fd1498Szrj next_load = NULL;
170738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
170838fd1498Szrj {
170938fd1498Szrj if (j != 0
171038fd1498Szrj && (next_load != load
171138fd1498Szrj || GROUP_GAP (vinfo_for_stmt (load)) != 1))
171238fd1498Szrj {
171338fd1498Szrj subchain_p = false;
171438fd1498Szrj break;
171538fd1498Szrj }
171638fd1498Szrj next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
171738fd1498Szrj }
171838fd1498Szrj if (subchain_p)
171938fd1498Szrj SLP_TREE_LOAD_PERMUTATION (node).release ();
172038fd1498Szrj else
172138fd1498Szrj {
172238fd1498Szrj stmt_vec_info group_info
172338fd1498Szrj = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
172438fd1498Szrj group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
172538fd1498Szrj unsigned HOST_WIDE_INT nunits;
172638fd1498Szrj unsigned k, maxk = 0;
172738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
172838fd1498Szrj if (k > maxk)
172938fd1498Szrj maxk = k;
173038fd1498Szrj /* In BB vectorization we may not actually use a loaded vector
173138fd1498Szrj accessing elements in excess of GROUP_SIZE. */
173238fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (group_info);
173338fd1498Szrj if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
173438fd1498Szrj || maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
173538fd1498Szrj {
173638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
173738fd1498Szrj "BB vectorization with gaps at the end of "
173838fd1498Szrj "a load is not supported\n");
173938fd1498Szrj return false;
174038fd1498Szrj }
174138fd1498Szrj
174238fd1498Szrj /* Verify the permutation can be generated. */
174338fd1498Szrj vec<tree> tem;
174438fd1498Szrj unsigned n_perms;
174538fd1498Szrj if (!vect_transform_slp_perm_load (node, tem, NULL,
174638fd1498Szrj 1, slp_instn, true, &n_perms))
174738fd1498Szrj {
174838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION,
174938fd1498Szrj vect_location,
175038fd1498Szrj "unsupported load permutation\n");
175138fd1498Szrj return false;
175238fd1498Szrj }
175338fd1498Szrj }
175438fd1498Szrj }
175538fd1498Szrj return true;
175638fd1498Szrj }
175738fd1498Szrj
175838fd1498Szrj /* For loop vectorization verify we can generate the permutation. Be
175938fd1498Szrj conservative about the vectorization factor, there are permutations
176038fd1498Szrj that will use three vector inputs only starting from a specific factor
176138fd1498Szrj and the vectorization factor is not yet final.
176238fd1498Szrj ??? The SLP instance unrolling factor might not be the maximum one. */
176338fd1498Szrj unsigned n_perms;
176438fd1498Szrj poly_uint64 test_vf
176538fd1498Szrj = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
176638fd1498Szrj LOOP_VINFO_VECT_FACTOR
176738fd1498Szrj (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
176838fd1498Szrj FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
176938fd1498Szrj if (node->load_permutation.exists ()
177038fd1498Szrj && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
177138fd1498Szrj slp_instn, true, &n_perms))
177238fd1498Szrj return false;
177338fd1498Szrj
177438fd1498Szrj return true;
177538fd1498Szrj }
177638fd1498Szrj
177738fd1498Szrj
177838fd1498Szrj /* Find the last store in SLP INSTANCE. */
177938fd1498Szrj
178038fd1498Szrj gimple *
vect_find_last_scalar_stmt_in_slp(slp_tree node)178138fd1498Szrj vect_find_last_scalar_stmt_in_slp (slp_tree node)
178238fd1498Szrj {
178338fd1498Szrj gimple *last = NULL, *stmt;
178438fd1498Szrj
178538fd1498Szrj for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
178638fd1498Szrj {
178738fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
178838fd1498Szrj if (is_pattern_stmt_p (stmt_vinfo))
178938fd1498Szrj last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
179038fd1498Szrj else
179138fd1498Szrj last = get_later_stmt (stmt, last);
179238fd1498Szrj }
179338fd1498Szrj
179438fd1498Szrj return last;
179538fd1498Szrj }
179638fd1498Szrj
179738fd1498Szrj /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
179838fd1498Szrj
179938fd1498Szrj static void
vect_analyze_slp_cost_1(slp_instance instance,slp_tree node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,unsigned ncopies_for_cost,scalar_stmts_set_t * visited)180038fd1498Szrj vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
180138fd1498Szrj stmt_vector_for_cost *prologue_cost_vec,
180238fd1498Szrj stmt_vector_for_cost *body_cost_vec,
180338fd1498Szrj unsigned ncopies_for_cost,
180438fd1498Szrj scalar_stmts_set_t* visited)
180538fd1498Szrj {
180638fd1498Szrj unsigned i, j;
180738fd1498Szrj slp_tree child;
180838fd1498Szrj gimple *stmt;
180938fd1498Szrj stmt_vec_info stmt_info;
181038fd1498Szrj tree lhs;
181138fd1498Szrj
181238fd1498Szrj /* If we already costed the exact same set of scalar stmts we're done.
181338fd1498Szrj We share the generated vector stmts for those. */
181438fd1498Szrj if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
181538fd1498Szrj return;
181638fd1498Szrj
181738fd1498Szrj visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
181838fd1498Szrj
181938fd1498Szrj /* Recurse down the SLP tree. */
182038fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
182138fd1498Szrj if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
182238fd1498Szrj vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
182338fd1498Szrj body_cost_vec, ncopies_for_cost, visited);
182438fd1498Szrj
182538fd1498Szrj /* Look at the first scalar stmt to determine the cost. */
182638fd1498Szrj stmt = SLP_TREE_SCALAR_STMTS (node)[0];
182738fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
182838fd1498Szrj if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
182938fd1498Szrj {
183038fd1498Szrj vect_memory_access_type memory_access_type
183138fd1498Szrj = (STMT_VINFO_STRIDED_P (stmt_info)
183238fd1498Szrj ? VMAT_STRIDED_SLP
183338fd1498Szrj : VMAT_CONTIGUOUS);
183438fd1498Szrj if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
183538fd1498Szrj vect_model_store_cost (stmt_info, ncopies_for_cost,
183638fd1498Szrj memory_access_type, VLS_STORE,
183738fd1498Szrj node, prologue_cost_vec, body_cost_vec);
183838fd1498Szrj else
183938fd1498Szrj {
184038fd1498Szrj gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
184138fd1498Szrj if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
184238fd1498Szrj {
184338fd1498Szrj /* If the load is permuted then the alignment is determined by
184438fd1498Szrj the first group element not by the first scalar stmt DR. */
184538fd1498Szrj stmt = GROUP_FIRST_ELEMENT (stmt_info);
184638fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
184738fd1498Szrj /* Record the cost for the permutation. */
184838fd1498Szrj unsigned n_perms;
184938fd1498Szrj vect_transform_slp_perm_load (node, vNULL, NULL,
185038fd1498Szrj ncopies_for_cost, instance, true,
185138fd1498Szrj &n_perms);
185238fd1498Szrj record_stmt_cost (body_cost_vec, n_perms, vec_perm,
185338fd1498Szrj stmt_info, 0, vect_body);
185438fd1498Szrj unsigned assumed_nunits
185538fd1498Szrj = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
185638fd1498Szrj /* And adjust the number of loads performed. This handles
185738fd1498Szrj redundancies as well as loads that are later dead. */
185838fd1498Szrj auto_sbitmap perm (GROUP_SIZE (stmt_info));
185938fd1498Szrj bitmap_clear (perm);
186038fd1498Szrj for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
186138fd1498Szrj bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
186238fd1498Szrj ncopies_for_cost = 0;
186338fd1498Szrj bool load_seen = false;
186438fd1498Szrj for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
186538fd1498Szrj {
186638fd1498Szrj if (i % assumed_nunits == 0)
186738fd1498Szrj {
186838fd1498Szrj if (load_seen)
186938fd1498Szrj ncopies_for_cost++;
187038fd1498Szrj load_seen = false;
187138fd1498Szrj }
187238fd1498Szrj if (bitmap_bit_p (perm, i))
187338fd1498Szrj load_seen = true;
187438fd1498Szrj }
187538fd1498Szrj if (load_seen)
187638fd1498Szrj ncopies_for_cost++;
187738fd1498Szrj gcc_assert (ncopies_for_cost
187838fd1498Szrj <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
187938fd1498Szrj + assumed_nunits - 1) / assumed_nunits);
188038fd1498Szrj poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
188138fd1498Szrj ncopies_for_cost *= estimated_poly_value (uf);
188238fd1498Szrj }
188338fd1498Szrj /* Record the cost for the vector loads. */
188438fd1498Szrj vect_model_load_cost (stmt_info, ncopies_for_cost,
188538fd1498Szrj memory_access_type, node, prologue_cost_vec,
188638fd1498Szrj body_cost_vec);
188738fd1498Szrj return;
188838fd1498Szrj }
188938fd1498Szrj }
189038fd1498Szrj else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
189138fd1498Szrj {
189238fd1498Szrj /* ncopies_for_cost is the number of IVs we generate. */
189338fd1498Szrj record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
189438fd1498Szrj stmt_info, 0, vect_body);
189538fd1498Szrj
189638fd1498Szrj /* Prologue cost for the initial values and step vector. */
189738fd1498Szrj record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
189838fd1498Szrj CONSTANT_CLASS_P
189938fd1498Szrj (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
190038fd1498Szrj (stmt_info))
190138fd1498Szrj ? vector_load : vec_construct,
190238fd1498Szrj stmt_info, 0, vect_prologue);
190338fd1498Szrj record_stmt_cost (prologue_cost_vec, 1,
190438fd1498Szrj CONSTANT_CLASS_P
190538fd1498Szrj (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
190638fd1498Szrj ? vector_load : vec_construct,
190738fd1498Szrj stmt_info, 0, vect_prologue);
190838fd1498Szrj
190938fd1498Szrj /* ??? No easy way to get at the actual number of vector stmts
191038fd1498Szrj to be geneated and thus the derived IVs. */
191138fd1498Szrj }
191238fd1498Szrj else
191338fd1498Szrj {
191438fd1498Szrj record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
191538fd1498Szrj stmt_info, 0, vect_body);
191638fd1498Szrj if (SLP_TREE_TWO_OPERATORS (node))
191738fd1498Szrj {
191838fd1498Szrj record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
191938fd1498Szrj stmt_info, 0, vect_body);
192038fd1498Szrj record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
192138fd1498Szrj stmt_info, 0, vect_body);
192238fd1498Szrj }
192338fd1498Szrj }
192438fd1498Szrj
192538fd1498Szrj /* Push SLP node def-type to stmts. */
192638fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
192738fd1498Szrj if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
192838fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
192938fd1498Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
193038fd1498Szrj
193138fd1498Szrj /* Scan operands and account for prologue cost of constants/externals.
193238fd1498Szrj ??? This over-estimates cost for multiple uses and should be
193338fd1498Szrj re-engineered. */
193438fd1498Szrj stmt = SLP_TREE_SCALAR_STMTS (node)[0];
193538fd1498Szrj lhs = gimple_get_lhs (stmt);
193638fd1498Szrj for (i = 0; i < gimple_num_ops (stmt); ++i)
193738fd1498Szrj {
193838fd1498Szrj tree op = gimple_op (stmt, i);
193938fd1498Szrj gimple *def_stmt;
194038fd1498Szrj enum vect_def_type dt;
194138fd1498Szrj if (!op || op == lhs)
194238fd1498Szrj continue;
194338fd1498Szrj if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
194438fd1498Szrj && (dt == vect_constant_def || dt == vect_external_def))
194538fd1498Szrj {
194638fd1498Szrj /* Without looking at the actual initializer a vector of
194738fd1498Szrj constants can be implemented as load from the constant pool.
194838fd1498Szrj When all elements are the same we can use a splat. */
194938fd1498Szrj tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
195038fd1498Szrj unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
195138fd1498Szrj unsigned num_vects_to_check;
195238fd1498Szrj unsigned HOST_WIDE_INT const_nunits;
195338fd1498Szrj unsigned nelt_limit;
195438fd1498Szrj if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
195538fd1498Szrj && ! multiple_p (const_nunits, group_size))
195638fd1498Szrj {
195738fd1498Szrj num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
195838fd1498Szrj nelt_limit = const_nunits;
195938fd1498Szrj }
196038fd1498Szrj else
196138fd1498Szrj {
196238fd1498Szrj /* If either the vector has variable length or the vectors
196338fd1498Szrj are composed of repeated whole groups we only need to
196438fd1498Szrj cost construction once. All vectors will be the same. */
196538fd1498Szrj num_vects_to_check = 1;
196638fd1498Szrj nelt_limit = group_size;
196738fd1498Szrj }
196838fd1498Szrj tree elt = NULL_TREE;
196938fd1498Szrj unsigned nelt = 0;
197038fd1498Szrj for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
197138fd1498Szrj {
197238fd1498Szrj unsigned si = j % group_size;
197338fd1498Szrj if (nelt == 0)
197438fd1498Szrj elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i);
197538fd1498Szrj /* ??? We're just tracking whether all operands of a single
197638fd1498Szrj vector initializer are the same, ideally we'd check if
197738fd1498Szrj we emitted the same one already. */
197838fd1498Szrj else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i))
197938fd1498Szrj elt = NULL_TREE;
198038fd1498Szrj nelt++;
198138fd1498Szrj if (nelt == nelt_limit)
198238fd1498Szrj {
198338fd1498Szrj /* ??? We need to pass down stmt_info for a vector type
198438fd1498Szrj even if it points to the wrong stmt. */
198538fd1498Szrj record_stmt_cost (prologue_cost_vec, 1,
198638fd1498Szrj dt == vect_external_def
198738fd1498Szrj ? (elt ? scalar_to_vec : vec_construct)
198838fd1498Szrj : vector_load,
198938fd1498Szrj stmt_info, 0, vect_prologue);
199038fd1498Szrj nelt = 0;
199138fd1498Szrj }
199238fd1498Szrj }
199338fd1498Szrj }
199438fd1498Szrj }
199538fd1498Szrj
199638fd1498Szrj /* Restore stmt def-types. */
199738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
199838fd1498Szrj if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
199938fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
200038fd1498Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
200138fd1498Szrj }
200238fd1498Szrj
200338fd1498Szrj /* Compute the cost for the SLP instance INSTANCE. */
200438fd1498Szrj
200538fd1498Szrj static void
vect_analyze_slp_cost(slp_instance instance,void * data,scalar_stmts_set_t * visited)200638fd1498Szrj vect_analyze_slp_cost (slp_instance instance, void *data, scalar_stmts_set_t *visited)
200738fd1498Szrj {
200838fd1498Szrj stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
200938fd1498Szrj unsigned ncopies_for_cost;
201038fd1498Szrj stmt_info_for_cost *si;
201138fd1498Szrj unsigned i;
201238fd1498Szrj
201338fd1498Szrj /* Calculate the number of vector stmts to create based on the unrolling
201438fd1498Szrj factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
201538fd1498Szrj GROUP_SIZE / NUNITS otherwise. */
201638fd1498Szrj unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
201738fd1498Szrj slp_tree node = SLP_INSTANCE_TREE (instance);
201838fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
201938fd1498Szrj /* Get the estimated vectorization factor, which is always one for
202038fd1498Szrj basic-block vectorization. */
202138fd1498Szrj unsigned int assumed_vf;
202238fd1498Szrj if (STMT_VINFO_LOOP_VINFO (stmt_info))
202338fd1498Szrj assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
202438fd1498Szrj else
202538fd1498Szrj assumed_vf = 1;
202638fd1498Szrj /* For reductions look at a reduction operand in case the reduction
202738fd1498Szrj operation is widening like DOT_PROD or SAD. */
202838fd1498Szrj tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
202938fd1498Szrj if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
203038fd1498Szrj {
203138fd1498Szrj gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
203238fd1498Szrj switch (gimple_assign_rhs_code (stmt))
203338fd1498Szrj {
203438fd1498Szrj case DOT_PROD_EXPR:
203538fd1498Szrj case SAD_EXPR:
203638fd1498Szrj vectype_for_cost = get_vectype_for_scalar_type
203738fd1498Szrj (TREE_TYPE (gimple_assign_rhs1 (stmt)));
203838fd1498Szrj break;
203938fd1498Szrj default:;
204038fd1498Szrj }
204138fd1498Szrj }
204238fd1498Szrj unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
204338fd1498Szrj ncopies_for_cost = (least_common_multiple (assumed_nunits,
204438fd1498Szrj group_size * assumed_vf)
204538fd1498Szrj / assumed_nunits);
204638fd1498Szrj
204738fd1498Szrj prologue_cost_vec.create (10);
204838fd1498Szrj body_cost_vec.create (10);
204938fd1498Szrj vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
205038fd1498Szrj &prologue_cost_vec, &body_cost_vec,
205138fd1498Szrj ncopies_for_cost, visited);
205238fd1498Szrj
205338fd1498Szrj /* Record the prologue costs, which were delayed until we were
205438fd1498Szrj sure that SLP was successful. */
205538fd1498Szrj FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
205638fd1498Szrj {
205738fd1498Szrj struct _stmt_vec_info *stmt_info
205838fd1498Szrj = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
205938fd1498Szrj (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
206038fd1498Szrj si->misalign, vect_prologue);
206138fd1498Szrj }
206238fd1498Szrj
206338fd1498Szrj /* Record the instance's instructions in the target cost model. */
206438fd1498Szrj FOR_EACH_VEC_ELT (body_cost_vec, i, si)
206538fd1498Szrj {
206638fd1498Szrj struct _stmt_vec_info *stmt_info
206738fd1498Szrj = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
206838fd1498Szrj (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
206938fd1498Szrj si->misalign, vect_body);
207038fd1498Szrj }
207138fd1498Szrj
207238fd1498Szrj prologue_cost_vec.release ();
207338fd1498Szrj body_cost_vec.release ();
207438fd1498Szrj }
207538fd1498Szrj
207638fd1498Szrj /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
207738fd1498Szrj one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
207838fd1498Szrj the first GROUP1_SIZE stmts, since stores are consecutive), the second
207938fd1498Szrj containing the remainder.
208038fd1498Szrj Return the first stmt in the second group. */
208138fd1498Szrj
208238fd1498Szrj static gimple *
vect_split_slp_store_group(gimple * first_stmt,unsigned group1_size)208338fd1498Szrj vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
208438fd1498Szrj {
208538fd1498Szrj stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
208638fd1498Szrj gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
208738fd1498Szrj gcc_assert (group1_size > 0);
208838fd1498Szrj int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
208938fd1498Szrj gcc_assert (group2_size > 0);
209038fd1498Szrj GROUP_SIZE (first_vinfo) = group1_size;
209138fd1498Szrj
209238fd1498Szrj gimple *stmt = first_stmt;
209338fd1498Szrj for (unsigned i = group1_size; i > 1; i--)
209438fd1498Szrj {
209538fd1498Szrj stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
209638fd1498Szrj gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
209738fd1498Szrj }
209838fd1498Szrj /* STMT is now the last element of the first group. */
209938fd1498Szrj gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
210038fd1498Szrj GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
210138fd1498Szrj
210238fd1498Szrj GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
210338fd1498Szrj for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
210438fd1498Szrj {
210538fd1498Szrj GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
210638fd1498Szrj gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
210738fd1498Szrj }
210838fd1498Szrj
210938fd1498Szrj /* For the second group, the GROUP_GAP is that before the original group,
211038fd1498Szrj plus skipping over the first vector. */
211138fd1498Szrj GROUP_GAP (vinfo_for_stmt (group2)) =
211238fd1498Szrj GROUP_GAP (first_vinfo) + group1_size;
211338fd1498Szrj
211438fd1498Szrj /* GROUP_GAP of the first group now has to skip over the second group too. */
211538fd1498Szrj GROUP_GAP (first_vinfo) += group2_size;
211638fd1498Szrj
211738fd1498Szrj if (dump_enabled_p ())
211838fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
211938fd1498Szrj group1_size, group2_size);
212038fd1498Szrj
212138fd1498Szrj return group2;
212238fd1498Szrj }
212338fd1498Szrj
212438fd1498Szrj /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
212538fd1498Szrj statements and a vector of NUNITS elements. */
212638fd1498Szrj
212738fd1498Szrj static poly_uint64
calculate_unrolling_factor(poly_uint64 nunits,unsigned int group_size)212838fd1498Szrj calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
212938fd1498Szrj {
213038fd1498Szrj return exact_div (common_multiple (nunits, group_size), group_size);
213138fd1498Szrj }
213238fd1498Szrj
213338fd1498Szrj /* Analyze an SLP instance starting from a group of grouped stores. Call
213438fd1498Szrj vect_build_slp_tree to build a tree of packed stmts if possible.
213538fd1498Szrj Return FALSE if it's impossible to SLP any stmt in the loop. */
213638fd1498Szrj
213738fd1498Szrj static bool
vect_analyze_slp_instance(vec_info * vinfo,gimple * stmt,unsigned max_tree_size)213838fd1498Szrj vect_analyze_slp_instance (vec_info *vinfo,
213938fd1498Szrj gimple *stmt, unsigned max_tree_size)
214038fd1498Szrj {
214138fd1498Szrj slp_instance new_instance;
214238fd1498Szrj slp_tree node;
214338fd1498Szrj unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
214438fd1498Szrj tree vectype, scalar_type = NULL_TREE;
214538fd1498Szrj gimple *next;
214638fd1498Szrj unsigned int i;
214738fd1498Szrj vec<slp_tree> loads;
214838fd1498Szrj struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
214938fd1498Szrj vec<gimple *> scalar_stmts;
215038fd1498Szrj
215138fd1498Szrj if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
215238fd1498Szrj {
215338fd1498Szrj if (dr)
215438fd1498Szrj {
215538fd1498Szrj scalar_type = TREE_TYPE (DR_REF (dr));
215638fd1498Szrj vectype = get_vectype_for_scalar_type (scalar_type);
215738fd1498Szrj }
215838fd1498Szrj else
215938fd1498Szrj {
216038fd1498Szrj gcc_assert (is_a <loop_vec_info> (vinfo));
216138fd1498Szrj vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
216238fd1498Szrj }
216338fd1498Szrj
216438fd1498Szrj group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
216538fd1498Szrj }
216638fd1498Szrj else
216738fd1498Szrj {
216838fd1498Szrj gcc_assert (is_a <loop_vec_info> (vinfo));
216938fd1498Szrj vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
217038fd1498Szrj group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
217138fd1498Szrj }
217238fd1498Szrj
217338fd1498Szrj if (!vectype)
217438fd1498Szrj {
217538fd1498Szrj if (dump_enabled_p ())
217638fd1498Szrj {
217738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
217838fd1498Szrj "Build SLP failed: unsupported data-type ");
217938fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
218038fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
218138fd1498Szrj }
218238fd1498Szrj
218338fd1498Szrj return false;
218438fd1498Szrj }
218538fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
218638fd1498Szrj
218738fd1498Szrj /* Create a node (a root of the SLP tree) for the packed grouped stores. */
218838fd1498Szrj scalar_stmts.create (group_size);
218938fd1498Szrj next = stmt;
219038fd1498Szrj if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
219138fd1498Szrj {
219238fd1498Szrj /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
219338fd1498Szrj while (next)
219438fd1498Szrj {
219538fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
219638fd1498Szrj && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
219738fd1498Szrj scalar_stmts.safe_push (
219838fd1498Szrj STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
219938fd1498Szrj else
220038fd1498Szrj scalar_stmts.safe_push (next);
220138fd1498Szrj next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
220238fd1498Szrj }
220338fd1498Szrj /* Mark the first element of the reduction chain as reduction to properly
220438fd1498Szrj transform the node. In the reduction analysis phase only the last
220538fd1498Szrj element of the chain is marked as reduction. */
220638fd1498Szrj if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
220738fd1498Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
220838fd1498Szrj }
220938fd1498Szrj else
221038fd1498Szrj {
221138fd1498Szrj /* Collect reduction statements. */
221238fd1498Szrj vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
221338fd1498Szrj for (i = 0; reductions.iterate (i, &next); i++)
221438fd1498Szrj scalar_stmts.safe_push (next);
221538fd1498Szrj }
221638fd1498Szrj
221738fd1498Szrj loads.create (group_size);
221838fd1498Szrj
221938fd1498Szrj /* Build the tree for the SLP instance. */
222038fd1498Szrj bool *matches = XALLOCAVEC (bool, group_size);
222138fd1498Szrj unsigned npermutes = 0;
222238fd1498Szrj bst_fail = new scalar_stmts_set_t ();
222338fd1498Szrj poly_uint64 max_nunits = nunits;
222438fd1498Szrj node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
222538fd1498Szrj &max_nunits, &loads, matches, &npermutes,
222638fd1498Szrj NULL, max_tree_size);
222738fd1498Szrj delete bst_fail;
222838fd1498Szrj if (node != NULL)
222938fd1498Szrj {
223038fd1498Szrj /* Calculate the unrolling factor based on the smallest type. */
223138fd1498Szrj poly_uint64 unrolling_factor
223238fd1498Szrj = calculate_unrolling_factor (max_nunits, group_size);
223338fd1498Szrj
223438fd1498Szrj if (maybe_ne (unrolling_factor, 1U)
223538fd1498Szrj && is_a <bb_vec_info> (vinfo))
223638fd1498Szrj {
223738fd1498Szrj unsigned HOST_WIDE_INT const_max_nunits;
223838fd1498Szrj if (!max_nunits.is_constant (&const_max_nunits)
223938fd1498Szrj || const_max_nunits > group_size)
224038fd1498Szrj {
224138fd1498Szrj if (dump_enabled_p ())
224238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
224338fd1498Szrj "Build SLP failed: store group "
224438fd1498Szrj "size not a multiple of the vector size "
224538fd1498Szrj "in basic block SLP\n");
224638fd1498Szrj vect_free_slp_tree (node);
224738fd1498Szrj loads.release ();
224838fd1498Szrj return false;
224938fd1498Szrj }
225038fd1498Szrj /* Fatal mismatch. */
225138fd1498Szrj matches[group_size / const_max_nunits * const_max_nunits] = false;
225238fd1498Szrj vect_free_slp_tree (node);
225338fd1498Szrj loads.release ();
225438fd1498Szrj }
225538fd1498Szrj else
225638fd1498Szrj {
225738fd1498Szrj /* Create a new SLP instance. */
225838fd1498Szrj new_instance = XNEW (struct _slp_instance);
225938fd1498Szrj SLP_INSTANCE_TREE (new_instance) = node;
226038fd1498Szrj SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
226138fd1498Szrj SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
226238fd1498Szrj SLP_INSTANCE_LOADS (new_instance) = loads;
226338fd1498Szrj
226438fd1498Szrj /* Compute the load permutation. */
226538fd1498Szrj slp_tree load_node;
226638fd1498Szrj bool loads_permuted = false;
226738fd1498Szrj FOR_EACH_VEC_ELT (loads, i, load_node)
226838fd1498Szrj {
226938fd1498Szrj vec<unsigned> load_permutation;
227038fd1498Szrj int j;
227138fd1498Szrj gimple *load, *first_stmt;
227238fd1498Szrj bool this_load_permuted = false;
227338fd1498Szrj load_permutation.create (group_size);
227438fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT
227538fd1498Szrj (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
227638fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
227738fd1498Szrj {
227838fd1498Szrj int load_place = vect_get_place_in_interleaving_chain
227938fd1498Szrj (load, first_stmt);
228038fd1498Szrj gcc_assert (load_place != -1);
228138fd1498Szrj if (load_place != j)
228238fd1498Szrj this_load_permuted = true;
228338fd1498Szrj load_permutation.safe_push (load_place);
228438fd1498Szrj }
228538fd1498Szrj if (!this_load_permuted
228638fd1498Szrj /* The load requires permutation when unrolling exposes
228738fd1498Szrj a gap either because the group is larger than the SLP
228838fd1498Szrj group-size or because there is a gap between the groups. */
228938fd1498Szrj && (known_eq (unrolling_factor, 1U)
229038fd1498Szrj || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
229138fd1498Szrj && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
229238fd1498Szrj {
229338fd1498Szrj load_permutation.release ();
229438fd1498Szrj continue;
229538fd1498Szrj }
229638fd1498Szrj SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
229738fd1498Szrj loads_permuted = true;
229838fd1498Szrj }
229938fd1498Szrj
230038fd1498Szrj if (loads_permuted)
230138fd1498Szrj {
230238fd1498Szrj if (!vect_supported_load_permutation_p (new_instance))
230338fd1498Szrj {
230438fd1498Szrj if (dump_enabled_p ())
230538fd1498Szrj {
230638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
230738fd1498Szrj "Build SLP failed: unsupported load "
230838fd1498Szrj "permutation ");
230938fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
231038fd1498Szrj TDF_SLIM, stmt, 0);
231138fd1498Szrj }
231238fd1498Szrj vect_free_slp_instance (new_instance);
231338fd1498Szrj return false;
231438fd1498Szrj }
231538fd1498Szrj }
231638fd1498Szrj
231738fd1498Szrj /* If the loads and stores can be handled with load/store-lan
231838fd1498Szrj instructions do not generate this SLP instance. */
231938fd1498Szrj if (is_a <loop_vec_info> (vinfo)
232038fd1498Szrj && loads_permuted
232138fd1498Szrj && dr && vect_store_lanes_supported (vectype, group_size, false))
232238fd1498Szrj {
232338fd1498Szrj slp_tree load_node;
232438fd1498Szrj FOR_EACH_VEC_ELT (loads, i, load_node)
232538fd1498Szrj {
232638fd1498Szrj gimple *first_stmt = GROUP_FIRST_ELEMENT
232738fd1498Szrj (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
232838fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
232938fd1498Szrj /* Use SLP for strided accesses (or if we
233038fd1498Szrj can't load-lanes). */
233138fd1498Szrj if (STMT_VINFO_STRIDED_P (stmt_vinfo)
233238fd1498Szrj || ! vect_load_lanes_supported
233338fd1498Szrj (STMT_VINFO_VECTYPE (stmt_vinfo),
233438fd1498Szrj GROUP_SIZE (stmt_vinfo), false))
233538fd1498Szrj break;
233638fd1498Szrj }
233738fd1498Szrj if (i == loads.length ())
233838fd1498Szrj {
233938fd1498Szrj if (dump_enabled_p ())
234038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
234138fd1498Szrj "Built SLP cancelled: can use "
234238fd1498Szrj "load/store-lanes\n");
234338fd1498Szrj vect_free_slp_instance (new_instance);
234438fd1498Szrj return false;
234538fd1498Szrj }
234638fd1498Szrj }
234738fd1498Szrj
234838fd1498Szrj vinfo->slp_instances.safe_push (new_instance);
234938fd1498Szrj
235038fd1498Szrj if (dump_enabled_p ())
235138fd1498Szrj {
235238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
235338fd1498Szrj "Final SLP tree for instance:\n");
235438fd1498Szrj vect_print_slp_tree (MSG_NOTE, vect_location, node);
235538fd1498Szrj }
235638fd1498Szrj
235738fd1498Szrj return true;
235838fd1498Szrj }
235938fd1498Szrj }
236038fd1498Szrj else
236138fd1498Szrj {
236238fd1498Szrj /* Failed to SLP. */
236338fd1498Szrj /* Free the allocated memory. */
236438fd1498Szrj scalar_stmts.release ();
236538fd1498Szrj loads.release ();
236638fd1498Szrj }
236738fd1498Szrj
236838fd1498Szrj /* For basic block SLP, try to break the group up into multiples of the
236938fd1498Szrj vector size. */
237038fd1498Szrj unsigned HOST_WIDE_INT const_nunits;
237138fd1498Szrj if (is_a <bb_vec_info> (vinfo)
237238fd1498Szrj && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
237338fd1498Szrj && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
237438fd1498Szrj && nunits.is_constant (&const_nunits))
237538fd1498Szrj {
237638fd1498Szrj /* We consider breaking the group only on VF boundaries from the existing
237738fd1498Szrj start. */
237838fd1498Szrj for (i = 0; i < group_size; i++)
237938fd1498Szrj if (!matches[i]) break;
238038fd1498Szrj
238138fd1498Szrj if (i >= const_nunits && i < group_size)
238238fd1498Szrj {
238338fd1498Szrj /* Split into two groups at the first vector boundary before i. */
238438fd1498Szrj gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
238538fd1498Szrj unsigned group1_size = i & ~(const_nunits - 1);
238638fd1498Szrj
238738fd1498Szrj gimple *rest = vect_split_slp_store_group (stmt, group1_size);
238838fd1498Szrj bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
238938fd1498Szrj /* If the first non-match was in the middle of a vector,
239038fd1498Szrj skip the rest of that vector. */
239138fd1498Szrj if (group1_size < i)
239238fd1498Szrj {
239338fd1498Szrj i = group1_size + const_nunits;
239438fd1498Szrj if (i < group_size)
239538fd1498Szrj rest = vect_split_slp_store_group (rest, const_nunits);
239638fd1498Szrj }
239738fd1498Szrj if (i < group_size)
239838fd1498Szrj res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
239938fd1498Szrj return res;
240038fd1498Szrj }
240138fd1498Szrj /* Even though the first vector did not all match, we might be able to SLP
240238fd1498Szrj (some) of the remainder. FORNOW ignore this possibility. */
240338fd1498Szrj }
240438fd1498Szrj
240538fd1498Szrj return false;
240638fd1498Szrj }
240738fd1498Szrj
240838fd1498Szrj
240938fd1498Szrj /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
241038fd1498Szrj trees of packed scalar stmts if SLP is possible. */
241138fd1498Szrj
241238fd1498Szrj bool
vect_analyze_slp(vec_info * vinfo,unsigned max_tree_size)241338fd1498Szrj vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
241438fd1498Szrj {
241538fd1498Szrj unsigned int i;
241638fd1498Szrj gimple *first_element;
241738fd1498Szrj
241838fd1498Szrj if (dump_enabled_p ())
241938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
242038fd1498Szrj
242138fd1498Szrj /* Find SLP sequences starting from groups of grouped stores. */
242238fd1498Szrj FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
242338fd1498Szrj vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
242438fd1498Szrj
242538fd1498Szrj if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
242638fd1498Szrj {
242738fd1498Szrj if (loop_vinfo->reduction_chains.length () > 0)
242838fd1498Szrj {
242938fd1498Szrj /* Find SLP sequences starting from reduction chains. */
243038fd1498Szrj FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
243138fd1498Szrj if (! vect_analyze_slp_instance (vinfo, first_element,
243238fd1498Szrj max_tree_size))
243338fd1498Szrj {
243438fd1498Szrj /* Dissolve reduction chain group. */
243538fd1498Szrj gimple *next, *stmt = first_element;
243638fd1498Szrj while (stmt)
243738fd1498Szrj {
243838fd1498Szrj stmt_vec_info vinfo = vinfo_for_stmt (stmt);
243938fd1498Szrj next = GROUP_NEXT_ELEMENT (vinfo);
244038fd1498Szrj GROUP_FIRST_ELEMENT (vinfo) = NULL;
244138fd1498Szrj GROUP_NEXT_ELEMENT (vinfo) = NULL;
244238fd1498Szrj stmt = next;
244338fd1498Szrj }
244438fd1498Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
244538fd1498Szrj = vect_internal_def;
244638fd1498Szrj }
244738fd1498Szrj }
244838fd1498Szrj
244938fd1498Szrj /* Find SLP sequences starting from groups of reductions. */
245038fd1498Szrj if (loop_vinfo->reductions.length () > 1)
245138fd1498Szrj vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
245238fd1498Szrj max_tree_size);
245338fd1498Szrj }
245438fd1498Szrj
245538fd1498Szrj return true;
245638fd1498Szrj }
245738fd1498Szrj
245838fd1498Szrj
245938fd1498Szrj /* For each possible SLP instance decide whether to SLP it and calculate overall
246038fd1498Szrj unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
246138fd1498Szrj least one instance. */
246238fd1498Szrj
246338fd1498Szrj bool
vect_make_slp_decision(loop_vec_info loop_vinfo)246438fd1498Szrj vect_make_slp_decision (loop_vec_info loop_vinfo)
246538fd1498Szrj {
246638fd1498Szrj unsigned int i;
246738fd1498Szrj poly_uint64 unrolling_factor = 1;
246838fd1498Szrj vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
246938fd1498Szrj slp_instance instance;
247038fd1498Szrj int decided_to_slp = 0;
247138fd1498Szrj
247238fd1498Szrj if (dump_enabled_p ())
247338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
247438fd1498Szrj "\n");
247538fd1498Szrj
247638fd1498Szrj FOR_EACH_VEC_ELT (slp_instances, i, instance)
247738fd1498Szrj {
247838fd1498Szrj /* FORNOW: SLP if you can. */
247938fd1498Szrj /* All unroll factors have the form current_vector_size * X for some
248038fd1498Szrj rational X, so they must have a common multiple. */
248138fd1498Szrj unrolling_factor
248238fd1498Szrj = force_common_multiple (unrolling_factor,
248338fd1498Szrj SLP_INSTANCE_UNROLLING_FACTOR (instance));
248438fd1498Szrj
248538fd1498Szrj /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
248638fd1498Szrj call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
248738fd1498Szrj loop-based vectorization. Such stmts will be marked as HYBRID. */
248838fd1498Szrj vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
248938fd1498Szrj decided_to_slp++;
249038fd1498Szrj }
249138fd1498Szrj
249238fd1498Szrj LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
249338fd1498Szrj
249438fd1498Szrj if (decided_to_slp && dump_enabled_p ())
249538fd1498Szrj {
249638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
249738fd1498Szrj "Decided to SLP %d instances. Unrolling factor ",
249838fd1498Szrj decided_to_slp);
249938fd1498Szrj dump_dec (MSG_NOTE, unrolling_factor);
250038fd1498Szrj dump_printf (MSG_NOTE, "\n");
250138fd1498Szrj }
250238fd1498Szrj
250338fd1498Szrj return (decided_to_slp > 0);
250438fd1498Szrj }
250538fd1498Szrj
250638fd1498Szrj
250738fd1498Szrj /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
250838fd1498Szrj can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
250938fd1498Szrj
251038fd1498Szrj static void
vect_detect_hybrid_slp_stmts(slp_tree node,unsigned i,slp_vect_type stype)251138fd1498Szrj vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
251238fd1498Szrj {
251338fd1498Szrj gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
251438fd1498Szrj imm_use_iterator imm_iter;
251538fd1498Szrj gimple *use_stmt;
251638fd1498Szrj stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
251738fd1498Szrj slp_tree child;
251838fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
251938fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
252038fd1498Szrj int j;
252138fd1498Szrj
252238fd1498Szrj /* Propagate hybrid down the SLP tree. */
252338fd1498Szrj if (stype == hybrid)
252438fd1498Szrj ;
252538fd1498Szrj else if (HYBRID_SLP_STMT (stmt_vinfo))
252638fd1498Szrj stype = hybrid;
252738fd1498Szrj else
252838fd1498Szrj {
252938fd1498Szrj /* Check if a pure SLP stmt has uses in non-SLP stmts. */
253038fd1498Szrj gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
253138fd1498Szrj /* If we get a pattern stmt here we have to use the LHS of the
253238fd1498Szrj original stmt for immediate uses. */
253338fd1498Szrj if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
253438fd1498Szrj && STMT_VINFO_RELATED_STMT (stmt_vinfo))
253538fd1498Szrj stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
253638fd1498Szrj tree def;
253738fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
253838fd1498Szrj def = gimple_phi_result (stmt);
253938fd1498Szrj else
254038fd1498Szrj def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
254138fd1498Szrj if (def)
254238fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
254338fd1498Szrj {
254438fd1498Szrj if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
254538fd1498Szrj continue;
254638fd1498Szrj use_vinfo = vinfo_for_stmt (use_stmt);
254738fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
254838fd1498Szrj && STMT_VINFO_RELATED_STMT (use_vinfo))
254938fd1498Szrj use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
255038fd1498Szrj if (!STMT_SLP_TYPE (use_vinfo)
255138fd1498Szrj && (STMT_VINFO_RELEVANT (use_vinfo)
255238fd1498Szrj || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
255338fd1498Szrj && !(gimple_code (use_stmt) == GIMPLE_PHI
255438fd1498Szrj && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
255538fd1498Szrj {
255638fd1498Szrj if (dump_enabled_p ())
255738fd1498Szrj {
255838fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
255938fd1498Szrj "def in non-SLP stmt: ");
256038fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
256138fd1498Szrj }
256238fd1498Szrj stype = hybrid;
256338fd1498Szrj }
256438fd1498Szrj }
256538fd1498Szrj }
256638fd1498Szrj
256738fd1498Szrj if (stype == hybrid
256838fd1498Szrj && !HYBRID_SLP_STMT (stmt_vinfo))
256938fd1498Szrj {
257038fd1498Szrj if (dump_enabled_p ())
257138fd1498Szrj {
257238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
257338fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
257438fd1498Szrj }
257538fd1498Szrj STMT_SLP_TYPE (stmt_vinfo) = hybrid;
257638fd1498Szrj }
257738fd1498Szrj
257838fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
257938fd1498Szrj if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
258038fd1498Szrj vect_detect_hybrid_slp_stmts (child, i, stype);
258138fd1498Szrj }
258238fd1498Szrj
258338fd1498Szrj /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
258438fd1498Szrj
258538fd1498Szrj static tree
vect_detect_hybrid_slp_1(tree * tp,int *,void * data)258638fd1498Szrj vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
258738fd1498Szrj {
258838fd1498Szrj walk_stmt_info *wi = (walk_stmt_info *)data;
258938fd1498Szrj struct loop *loopp = (struct loop *)wi->info;
259038fd1498Szrj
259138fd1498Szrj if (wi->is_lhs)
259238fd1498Szrj return NULL_TREE;
259338fd1498Szrj
259438fd1498Szrj if (TREE_CODE (*tp) == SSA_NAME
259538fd1498Szrj && !SSA_NAME_IS_DEFAULT_DEF (*tp))
259638fd1498Szrj {
259738fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
259838fd1498Szrj if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
259938fd1498Szrj && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
260038fd1498Szrj {
260138fd1498Szrj if (dump_enabled_p ())
260238fd1498Szrj {
260338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
260438fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
260538fd1498Szrj }
260638fd1498Szrj STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
260738fd1498Szrj }
260838fd1498Szrj }
260938fd1498Szrj
261038fd1498Szrj return NULL_TREE;
261138fd1498Szrj }
261238fd1498Szrj
261338fd1498Szrj static tree
vect_detect_hybrid_slp_2(gimple_stmt_iterator * gsi,bool * handled,walk_stmt_info *)261438fd1498Szrj vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
261538fd1498Szrj walk_stmt_info *)
261638fd1498Szrj {
261738fd1498Szrj stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
261838fd1498Szrj /* If the stmt is in a SLP instance then this isn't a reason
261938fd1498Szrj to mark use definitions in other SLP instances as hybrid. */
262038fd1498Szrj if (! STMT_SLP_TYPE (use_vinfo)
262138fd1498Szrj && (STMT_VINFO_RELEVANT (use_vinfo)
262238fd1498Szrj || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
262338fd1498Szrj && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
262438fd1498Szrj && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
262538fd1498Szrj ;
262638fd1498Szrj else
262738fd1498Szrj *handled = true;
262838fd1498Szrj return NULL_TREE;
262938fd1498Szrj }
263038fd1498Szrj
263138fd1498Szrj /* Find stmts that must be both vectorized and SLPed. */
263238fd1498Szrj
263338fd1498Szrj void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)263438fd1498Szrj vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
263538fd1498Szrj {
263638fd1498Szrj unsigned int i;
263738fd1498Szrj vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
263838fd1498Szrj slp_instance instance;
263938fd1498Szrj
264038fd1498Szrj if (dump_enabled_p ())
264138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
264238fd1498Szrj "\n");
264338fd1498Szrj
264438fd1498Szrj /* First walk all pattern stmt in the loop and mark defs of uses as
264538fd1498Szrj hybrid because immediate uses in them are not recorded. */
264638fd1498Szrj for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
264738fd1498Szrj {
264838fd1498Szrj basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
264938fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
265038fd1498Szrj gsi_next (&gsi))
265138fd1498Szrj {
265238fd1498Szrj gimple *stmt = gsi_stmt (gsi);
265338fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
265438fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_info))
265538fd1498Szrj {
265638fd1498Szrj walk_stmt_info wi;
265738fd1498Szrj memset (&wi, 0, sizeof (wi));
265838fd1498Szrj wi.info = LOOP_VINFO_LOOP (loop_vinfo);
265938fd1498Szrj gimple_stmt_iterator gsi2
266038fd1498Szrj = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
266138fd1498Szrj walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
266238fd1498Szrj vect_detect_hybrid_slp_1, &wi);
266338fd1498Szrj walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
266438fd1498Szrj vect_detect_hybrid_slp_2,
266538fd1498Szrj vect_detect_hybrid_slp_1, &wi);
266638fd1498Szrj }
266738fd1498Szrj }
266838fd1498Szrj }
266938fd1498Szrj
267038fd1498Szrj /* Then walk the SLP instance trees marking stmts with uses in
267138fd1498Szrj non-SLP stmts as hybrid, also propagating hybrid down the
267238fd1498Szrj SLP tree, collecting the above info on-the-fly. */
267338fd1498Szrj FOR_EACH_VEC_ELT (slp_instances, i, instance)
267438fd1498Szrj {
267538fd1498Szrj for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
267638fd1498Szrj vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
267738fd1498Szrj i, pure_slp);
267838fd1498Szrj }
267938fd1498Szrj }
268038fd1498Szrj
268138fd1498Szrj
268238fd1498Szrj /* Initialize a bb_vec_info struct for the statements between
268338fd1498Szrj REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
268438fd1498Szrj
_bb_vec_info(gimple_stmt_iterator region_begin_in,gimple_stmt_iterator region_end_in)268538fd1498Szrj _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
268638fd1498Szrj gimple_stmt_iterator region_end_in)
268738fd1498Szrj : vec_info (vec_info::bb, init_cost (NULL)),
268838fd1498Szrj bb (gsi_bb (region_begin_in)),
268938fd1498Szrj region_begin (region_begin_in),
269038fd1498Szrj region_end (region_end_in)
269138fd1498Szrj {
269238fd1498Szrj gimple_stmt_iterator gsi;
269338fd1498Szrj
269438fd1498Szrj for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
269538fd1498Szrj gsi_next (&gsi))
269638fd1498Szrj {
269738fd1498Szrj gimple *stmt = gsi_stmt (gsi);
269838fd1498Szrj gimple_set_uid (stmt, 0);
269938fd1498Szrj set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
270038fd1498Szrj }
270138fd1498Szrj
270238fd1498Szrj bb->aux = this;
270338fd1498Szrj }
270438fd1498Szrj
270538fd1498Szrj
270638fd1498Szrj /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
270738fd1498Szrj stmts in the basic block. */
270838fd1498Szrj
~_bb_vec_info()270938fd1498Szrj _bb_vec_info::~_bb_vec_info ()
271038fd1498Szrj {
271138fd1498Szrj for (gimple_stmt_iterator si = region_begin;
271238fd1498Szrj gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
271338fd1498Szrj {
271438fd1498Szrj gimple *stmt = gsi_stmt (si);
271538fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
271638fd1498Szrj
271738fd1498Szrj if (stmt_info)
271838fd1498Szrj /* Free stmt_vec_info. */
271938fd1498Szrj free_stmt_vec_info (stmt);
272038fd1498Szrj
272138fd1498Szrj /* Reset region marker. */
272238fd1498Szrj gimple_set_uid (stmt, -1);
272338fd1498Szrj }
272438fd1498Szrj
272538fd1498Szrj bb->aux = NULL;
272638fd1498Szrj }
272738fd1498Szrj
272838fd1498Szrj
272938fd1498Szrj /* Analyze statements contained in SLP tree NODE after recursively analyzing
273038fd1498Szrj the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
273138fd1498Szrj
273238fd1498Szrj Return true if the operations are supported. */
273338fd1498Szrj
273438fd1498Szrj static bool
vect_slp_analyze_node_operations(vec_info * vinfo,slp_tree node,slp_instance node_instance)273538fd1498Szrj vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
273638fd1498Szrj slp_instance node_instance)
273738fd1498Szrj {
273838fd1498Szrj bool dummy;
273938fd1498Szrj int i, j;
274038fd1498Szrj gimple *stmt;
274138fd1498Szrj slp_tree child;
274238fd1498Szrj
274338fd1498Szrj if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
274438fd1498Szrj return true;
274538fd1498Szrj
274638fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
274738fd1498Szrj if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
274838fd1498Szrj return false;
274938fd1498Szrj
275038fd1498Szrj stmt = SLP_TREE_SCALAR_STMTS (node)[0];
275138fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
275238fd1498Szrj gcc_assert (stmt_info);
275338fd1498Szrj gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
275438fd1498Szrj
275538fd1498Szrj /* For BB vectorization vector types are assigned here.
275638fd1498Szrj Memory accesses already got their vector type assigned
275738fd1498Szrj in vect_analyze_data_refs. */
275838fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
275938fd1498Szrj if (bb_vinfo
276038fd1498Szrj && ! STMT_VINFO_DATA_REF (stmt_info))
276138fd1498Szrj {
276238fd1498Szrj gcc_assert (PURE_SLP_STMT (stmt_info));
276338fd1498Szrj
276438fd1498Szrj tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
276538fd1498Szrj if (dump_enabled_p ())
276638fd1498Szrj {
276738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
276838fd1498Szrj "get vectype for scalar type: ");
276938fd1498Szrj dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
277038fd1498Szrj dump_printf (MSG_NOTE, "\n");
277138fd1498Szrj }
277238fd1498Szrj
277338fd1498Szrj tree vectype = get_vectype_for_scalar_type (scalar_type);
277438fd1498Szrj if (!vectype)
277538fd1498Szrj {
277638fd1498Szrj if (dump_enabled_p ())
277738fd1498Szrj {
277838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277938fd1498Szrj "not SLPed: unsupported data-type ");
278038fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
278138fd1498Szrj scalar_type);
278238fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
278338fd1498Szrj }
278438fd1498Szrj return false;
278538fd1498Szrj }
278638fd1498Szrj
278738fd1498Szrj if (dump_enabled_p ())
278838fd1498Szrj {
278938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
279038fd1498Szrj dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
279138fd1498Szrj dump_printf (MSG_NOTE, "\n");
279238fd1498Szrj }
279338fd1498Szrj
279438fd1498Szrj gimple *sstmt;
279538fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
279638fd1498Szrj STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
279738fd1498Szrj }
279838fd1498Szrj
279938fd1498Szrj /* Calculate the number of vector statements to be created for the
280038fd1498Szrj scalar stmts in this node. For SLP reductions it is equal to the
280138fd1498Szrj number of vector statements in the children (which has already been
280238fd1498Szrj calculated by the recursive call). Otherwise it is the number of
280338fd1498Szrj scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
280438fd1498Szrj VF divided by the number of elements in a vector. */
280538fd1498Szrj if (GROUP_FIRST_ELEMENT (stmt_info)
280638fd1498Szrj && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
280738fd1498Szrj SLP_TREE_NUMBER_OF_VEC_STMTS (node)
280838fd1498Szrj = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
280938fd1498Szrj else
281038fd1498Szrj {
281138fd1498Szrj poly_uint64 vf;
281238fd1498Szrj if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
281338fd1498Szrj vf = loop_vinfo->vectorization_factor;
281438fd1498Szrj else
281538fd1498Szrj vf = 1;
281638fd1498Szrj unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
281738fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
281838fd1498Szrj SLP_TREE_NUMBER_OF_VEC_STMTS (node)
281938fd1498Szrj = vect_get_num_vectors (vf * group_size, vectype);
282038fd1498Szrj }
282138fd1498Szrj
2822*58e805e6Szrj /* ??? We have to catch the case late where two first scalar stmts appear
2823*58e805e6Szrj in multiple SLP children with different def type and fail. Remember
2824*58e805e6Szrj original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2825*58e805e6Szrj match it when that is vect_internal_def. */
2826*58e805e6Szrj auto_vec<vect_def_type, 4> dt;
2827*58e805e6Szrj dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2828*58e805e6Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2829*58e805e6Szrj dt[j]
2830*58e805e6Szrj = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]));
2831*58e805e6Szrj
283238fd1498Szrj /* Push SLP node def-type to stmt operands. */
283338fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
283438fd1498Szrj if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
283538fd1498Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
283638fd1498Szrj = SLP_TREE_DEF_TYPE (child);
2837*58e805e6Szrj
2838*58e805e6Szrj /* Check everything worked out. */
2839*58e805e6Szrj bool res = true;
284038fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
284138fd1498Szrj if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2842*58e805e6Szrj {
2843*58e805e6Szrj if (STMT_VINFO_DEF_TYPE
2844*58e805e6Szrj (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2845*58e805e6Szrj != SLP_TREE_DEF_TYPE (child))
2846*58e805e6Szrj res = false;
2847*58e805e6Szrj }
2848*58e805e6Szrj else if (STMT_VINFO_DEF_TYPE
2849*58e805e6Szrj (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) != dt[j])
2850*58e805e6Szrj res = false;
2851*58e805e6Szrj if (!res && dump_enabled_p ())
2852*58e805e6Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2853*58e805e6Szrj "not vectorized: same operand with different "
2854*58e805e6Szrj "def type in stmt.\n");
2855*58e805e6Szrj if (res)
2856*58e805e6Szrj res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
285738fd1498Szrj
2858*58e805e6Szrj /* Restore def-types. */
2859*58e805e6Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2860*58e805e6Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2861*58e805e6Szrj = dt[j];
2862*58e805e6Szrj
2863*58e805e6Szrj return res;
286438fd1498Szrj }
286538fd1498Szrj
286638fd1498Szrj
286738fd1498Szrj /* Analyze statements in SLP instances of VINFO. Return true if the
286838fd1498Szrj operations are supported. */
286938fd1498Szrj
287038fd1498Szrj bool
vect_slp_analyze_operations(vec_info * vinfo)287138fd1498Szrj vect_slp_analyze_operations (vec_info *vinfo)
287238fd1498Szrj {
287338fd1498Szrj slp_instance instance;
287438fd1498Szrj int i;
287538fd1498Szrj
287638fd1498Szrj if (dump_enabled_p ())
287738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
287838fd1498Szrj "=== vect_slp_analyze_operations ===\n");
287938fd1498Szrj
288038fd1498Szrj for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
288138fd1498Szrj {
288238fd1498Szrj if (!vect_slp_analyze_node_operations (vinfo,
288338fd1498Szrj SLP_INSTANCE_TREE (instance),
288438fd1498Szrj instance))
288538fd1498Szrj {
288638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
288738fd1498Szrj "removing SLP instance operations starting from: ");
288838fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
288938fd1498Szrj SLP_TREE_SCALAR_STMTS
289038fd1498Szrj (SLP_INSTANCE_TREE (instance))[0], 0);
289138fd1498Szrj vect_free_slp_instance (instance);
289238fd1498Szrj vinfo->slp_instances.ordered_remove (i);
289338fd1498Szrj }
289438fd1498Szrj else
289538fd1498Szrj i++;
289638fd1498Szrj }
289738fd1498Szrj
289838fd1498Szrj if (dump_enabled_p ())
289938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
290038fd1498Szrj "=== vect_analyze_slp_cost ===\n");
290138fd1498Szrj
290238fd1498Szrj /* Compute the costs of the SLP instances. */
290338fd1498Szrj scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
290438fd1498Szrj for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
290538fd1498Szrj vect_analyze_slp_cost (instance, vinfo->target_cost_data, visited);
290638fd1498Szrj delete visited;
290738fd1498Szrj
290838fd1498Szrj return !vinfo->slp_instances.is_empty ();
290938fd1498Szrj }
291038fd1498Szrj
291138fd1498Szrj
291238fd1498Szrj /* Compute the scalar cost of the SLP node NODE and its children
291338fd1498Szrj and return it. Do not account defs that are marked in LIFE and
291438fd1498Szrj update LIFE according to uses of NODE. */
291538fd1498Szrj
291638fd1498Szrj static unsigned
vect_bb_slp_scalar_cost(basic_block bb,slp_tree node,vec<bool,va_heap> * life)291738fd1498Szrj vect_bb_slp_scalar_cost (basic_block bb,
291838fd1498Szrj slp_tree node, vec<bool, va_heap> *life)
291938fd1498Szrj {
292038fd1498Szrj unsigned scalar_cost = 0;
292138fd1498Szrj unsigned i;
292238fd1498Szrj gimple *stmt;
292338fd1498Szrj slp_tree child;
292438fd1498Szrj
292538fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
292638fd1498Szrj {
292738fd1498Szrj unsigned stmt_cost;
292838fd1498Szrj ssa_op_iter op_iter;
292938fd1498Szrj def_operand_p def_p;
293038fd1498Szrj stmt_vec_info stmt_info;
293138fd1498Szrj
293238fd1498Szrj if ((*life)[i])
293338fd1498Szrj continue;
293438fd1498Szrj
293538fd1498Szrj /* If there is a non-vectorized use of the defs then the scalar
293638fd1498Szrj stmt is kept live in which case we do not account it or any
293738fd1498Szrj required defs in the SLP children in the scalar cost. This
293838fd1498Szrj way we make the vectorization more costly when compared to
293938fd1498Szrj the scalar cost. */
294038fd1498Szrj FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
294138fd1498Szrj {
294238fd1498Szrj imm_use_iterator use_iter;
294338fd1498Szrj gimple *use_stmt;
294438fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
294538fd1498Szrj if (!is_gimple_debug (use_stmt)
294638fd1498Szrj && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
294738fd1498Szrj use_stmt)
294838fd1498Szrj || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
294938fd1498Szrj {
295038fd1498Szrj (*life)[i] = true;
295138fd1498Szrj BREAK_FROM_IMM_USE_STMT (use_iter);
295238fd1498Szrj }
295338fd1498Szrj }
295438fd1498Szrj if ((*life)[i])
295538fd1498Szrj continue;
295638fd1498Szrj
295738fd1498Szrj /* Count scalar stmts only once. */
295838fd1498Szrj if (gimple_visited_p (stmt))
295938fd1498Szrj continue;
296038fd1498Szrj gimple_set_visited (stmt, true);
296138fd1498Szrj
296238fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
296338fd1498Szrj if (STMT_VINFO_DATA_REF (stmt_info))
296438fd1498Szrj {
296538fd1498Szrj if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
296638fd1498Szrj stmt_cost = vect_get_stmt_cost (scalar_load);
296738fd1498Szrj else
296838fd1498Szrj stmt_cost = vect_get_stmt_cost (scalar_store);
296938fd1498Szrj }
297038fd1498Szrj else
297138fd1498Szrj stmt_cost = vect_get_stmt_cost (scalar_stmt);
297238fd1498Szrj
297338fd1498Szrj scalar_cost += stmt_cost;
297438fd1498Szrj }
297538fd1498Szrj
297638fd1498Szrj auto_vec<bool, 20> subtree_life;
297738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
297838fd1498Szrj {
297938fd1498Szrj if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
298038fd1498Szrj {
298138fd1498Szrj /* Do not directly pass LIFE to the recursive call, copy it to
298238fd1498Szrj confine changes in the callee to the current child/subtree. */
298338fd1498Szrj subtree_life.safe_splice (*life);
298438fd1498Szrj scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
298538fd1498Szrj subtree_life.truncate (0);
298638fd1498Szrj }
298738fd1498Szrj }
298838fd1498Szrj
298938fd1498Szrj return scalar_cost;
299038fd1498Szrj }
299138fd1498Szrj
299238fd1498Szrj /* Check if vectorization of the basic block is profitable. */
299338fd1498Szrj
299438fd1498Szrj static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)299538fd1498Szrj vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
299638fd1498Szrj {
299738fd1498Szrj vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
299838fd1498Szrj slp_instance instance;
299938fd1498Szrj int i;
300038fd1498Szrj unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
300138fd1498Szrj unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
300238fd1498Szrj
300338fd1498Szrj /* Calculate scalar cost. */
300438fd1498Szrj FOR_EACH_VEC_ELT (slp_instances, i, instance)
300538fd1498Szrj {
300638fd1498Szrj auto_vec<bool, 20> life;
300738fd1498Szrj life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
300838fd1498Szrj scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
300938fd1498Szrj SLP_INSTANCE_TREE (instance),
301038fd1498Szrj &life);
301138fd1498Szrj }
301238fd1498Szrj
301338fd1498Szrj /* Unset visited flag. */
301438fd1498Szrj for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
301538fd1498Szrj gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
301638fd1498Szrj gimple_set_visited (gsi_stmt (gsi), false);
301738fd1498Szrj
301838fd1498Szrj /* Complete the target-specific cost calculation. */
301938fd1498Szrj finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
302038fd1498Szrj &vec_inside_cost, &vec_epilogue_cost);
302138fd1498Szrj
302238fd1498Szrj vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
302338fd1498Szrj
302438fd1498Szrj if (dump_enabled_p ())
302538fd1498Szrj {
302638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
302738fd1498Szrj dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
302838fd1498Szrj vec_inside_cost);
302938fd1498Szrj dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
303038fd1498Szrj dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
303138fd1498Szrj dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
303238fd1498Szrj }
303338fd1498Szrj
303438fd1498Szrj /* Vectorization is profitable if its cost is more than the cost of scalar
303538fd1498Szrj version. Note that we err on the vector side for equal cost because
303638fd1498Szrj the cost estimate is otherwise quite pessimistic (constant uses are
303738fd1498Szrj free on the scalar side but cost a load on the vector side for
303838fd1498Szrj example). */
303938fd1498Szrj if (vec_outside_cost + vec_inside_cost > scalar_cost)
304038fd1498Szrj return false;
304138fd1498Szrj
304238fd1498Szrj return true;
304338fd1498Szrj }
304438fd1498Szrj
304538fd1498Szrj /* Check if the basic block can be vectorized. Returns a bb_vec_info
304638fd1498Szrj if so and sets fatal to true if failure is independent of
304738fd1498Szrj current_vector_size. */
304838fd1498Szrj
304938fd1498Szrj static bb_vec_info
vect_slp_analyze_bb_1(gimple_stmt_iterator region_begin,gimple_stmt_iterator region_end,vec<data_reference_p> datarefs,int n_stmts,bool & fatal)305038fd1498Szrj vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
305138fd1498Szrj gimple_stmt_iterator region_end,
305238fd1498Szrj vec<data_reference_p> datarefs, int n_stmts,
305338fd1498Szrj bool &fatal)
305438fd1498Szrj {
305538fd1498Szrj bb_vec_info bb_vinfo;
305638fd1498Szrj slp_instance instance;
305738fd1498Szrj int i;
305838fd1498Szrj poly_uint64 min_vf = 2;
305938fd1498Szrj
306038fd1498Szrj /* The first group of checks is independent of the vector size. */
306138fd1498Szrj fatal = true;
306238fd1498Szrj
306338fd1498Szrj if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
306438fd1498Szrj {
306538fd1498Szrj if (dump_enabled_p ())
306638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
306738fd1498Szrj "not vectorized: too many instructions in "
306838fd1498Szrj "basic block.\n");
306938fd1498Szrj free_data_refs (datarefs);
307038fd1498Szrj return NULL;
307138fd1498Szrj }
307238fd1498Szrj
307338fd1498Szrj bb_vinfo = new _bb_vec_info (region_begin, region_end);
307438fd1498Szrj if (!bb_vinfo)
307538fd1498Szrj return NULL;
307638fd1498Szrj
307738fd1498Szrj BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
307838fd1498Szrj
307938fd1498Szrj /* Analyze the data references. */
308038fd1498Szrj
308138fd1498Szrj if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
308238fd1498Szrj {
308338fd1498Szrj if (dump_enabled_p ())
308438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
308538fd1498Szrj "not vectorized: unhandled data-ref in basic "
308638fd1498Szrj "block.\n");
308738fd1498Szrj
308838fd1498Szrj delete bb_vinfo;
308938fd1498Szrj return NULL;
309038fd1498Szrj }
309138fd1498Szrj
309238fd1498Szrj if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
309338fd1498Szrj {
309438fd1498Szrj if (dump_enabled_p ())
309538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309638fd1498Szrj "not vectorized: not enough data-refs in "
309738fd1498Szrj "basic block.\n");
309838fd1498Szrj
309938fd1498Szrj delete bb_vinfo;
310038fd1498Szrj return NULL;
310138fd1498Szrj }
310238fd1498Szrj
310338fd1498Szrj if (!vect_analyze_data_ref_accesses (bb_vinfo))
310438fd1498Szrj {
310538fd1498Szrj if (dump_enabled_p ())
310638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310738fd1498Szrj "not vectorized: unhandled data access in "
310838fd1498Szrj "basic block.\n");
310938fd1498Szrj
311038fd1498Szrj delete bb_vinfo;
311138fd1498Szrj return NULL;
311238fd1498Szrj }
311338fd1498Szrj
311438fd1498Szrj /* If there are no grouped stores in the region there is no need
311538fd1498Szrj to continue with pattern recog as vect_analyze_slp will fail
311638fd1498Szrj anyway. */
311738fd1498Szrj if (bb_vinfo->grouped_stores.is_empty ())
311838fd1498Szrj {
311938fd1498Szrj if (dump_enabled_p ())
312038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
312138fd1498Szrj "not vectorized: no grouped stores in "
312238fd1498Szrj "basic block.\n");
312338fd1498Szrj
312438fd1498Szrj delete bb_vinfo;
312538fd1498Szrj return NULL;
312638fd1498Szrj }
312738fd1498Szrj
312838fd1498Szrj /* While the rest of the analysis below depends on it in some way. */
312938fd1498Szrj fatal = false;
313038fd1498Szrj
313138fd1498Szrj vect_pattern_recog (bb_vinfo);
313238fd1498Szrj
313338fd1498Szrj /* Check the SLP opportunities in the basic block, analyze and build SLP
313438fd1498Szrj trees. */
313538fd1498Szrj if (!vect_analyze_slp (bb_vinfo, n_stmts))
313638fd1498Szrj {
313738fd1498Szrj if (dump_enabled_p ())
313838fd1498Szrj {
313938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
314038fd1498Szrj "Failed to SLP the basic block.\n");
314138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
314238fd1498Szrj "not vectorized: failed to find SLP opportunities "
314338fd1498Szrj "in basic block.\n");
314438fd1498Szrj }
314538fd1498Szrj
314638fd1498Szrj delete bb_vinfo;
314738fd1498Szrj return NULL;
314838fd1498Szrj }
314938fd1498Szrj
315038fd1498Szrj vect_record_base_alignments (bb_vinfo);
315138fd1498Szrj
315238fd1498Szrj /* Analyze and verify the alignment of data references and the
315338fd1498Szrj dependence in the SLP instances. */
315438fd1498Szrj for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
315538fd1498Szrj {
315638fd1498Szrj if (! vect_slp_analyze_and_verify_instance_alignment (instance)
315738fd1498Szrj || ! vect_slp_analyze_instance_dependence (instance))
315838fd1498Szrj {
315938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
316038fd1498Szrj "removing SLP instance operations starting from: ");
316138fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
316238fd1498Szrj SLP_TREE_SCALAR_STMTS
316338fd1498Szrj (SLP_INSTANCE_TREE (instance))[0], 0);
316438fd1498Szrj vect_free_slp_instance (instance);
316538fd1498Szrj BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
316638fd1498Szrj continue;
316738fd1498Szrj }
316838fd1498Szrj
316938fd1498Szrj /* Mark all the statements that we want to vectorize as pure SLP and
317038fd1498Szrj relevant. */
317138fd1498Szrj vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
317238fd1498Szrj vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
317338fd1498Szrj
317438fd1498Szrj i++;
317538fd1498Szrj }
317638fd1498Szrj if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
317738fd1498Szrj {
317838fd1498Szrj delete bb_vinfo;
317938fd1498Szrj return NULL;
318038fd1498Szrj }
318138fd1498Szrj
318238fd1498Szrj if (!vect_slp_analyze_operations (bb_vinfo))
318338fd1498Szrj {
318438fd1498Szrj if (dump_enabled_p ())
318538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
318638fd1498Szrj "not vectorized: bad operation in basic block.\n");
318738fd1498Szrj
318838fd1498Szrj delete bb_vinfo;
318938fd1498Szrj return NULL;
319038fd1498Szrj }
319138fd1498Szrj
319238fd1498Szrj /* Cost model: check if the vectorization is worthwhile. */
319338fd1498Szrj if (!unlimited_cost_model (NULL)
319438fd1498Szrj && !vect_bb_vectorization_profitable_p (bb_vinfo))
319538fd1498Szrj {
319638fd1498Szrj if (dump_enabled_p ())
319738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
319838fd1498Szrj "not vectorized: vectorization is not "
319938fd1498Szrj "profitable.\n");
320038fd1498Szrj
320138fd1498Szrj delete bb_vinfo;
320238fd1498Szrj return NULL;
320338fd1498Szrj }
320438fd1498Szrj
320538fd1498Szrj if (dump_enabled_p ())
320638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
320738fd1498Szrj "Basic block will be vectorized using SLP\n");
320838fd1498Szrj
320938fd1498Szrj return bb_vinfo;
321038fd1498Szrj }
321138fd1498Szrj
321238fd1498Szrj
321338fd1498Szrj /* Main entry for the BB vectorizer. Analyze and transform BB, returns
321438fd1498Szrj true if anything in the basic-block was vectorized. */
321538fd1498Szrj
321638fd1498Szrj bool
vect_slp_bb(basic_block bb)321738fd1498Szrj vect_slp_bb (basic_block bb)
321838fd1498Szrj {
321938fd1498Szrj bb_vec_info bb_vinfo;
322038fd1498Szrj gimple_stmt_iterator gsi;
322138fd1498Szrj bool any_vectorized = false;
322238fd1498Szrj auto_vector_sizes vector_sizes;
322338fd1498Szrj
322438fd1498Szrj if (dump_enabled_p ())
322538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
322638fd1498Szrj
322738fd1498Szrj /* Autodetect first vector size we try. */
322838fd1498Szrj current_vector_size = 0;
322938fd1498Szrj targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
323038fd1498Szrj unsigned int next_size = 0;
323138fd1498Szrj
323238fd1498Szrj gsi = gsi_start_bb (bb);
323338fd1498Szrj
323438fd1498Szrj poly_uint64 autodetected_vector_size = 0;
323538fd1498Szrj while (1)
323638fd1498Szrj {
323738fd1498Szrj if (gsi_end_p (gsi))
323838fd1498Szrj break;
323938fd1498Szrj
324038fd1498Szrj gimple_stmt_iterator region_begin = gsi;
324138fd1498Szrj vec<data_reference_p> datarefs = vNULL;
324238fd1498Szrj int insns = 0;
324338fd1498Szrj
324438fd1498Szrj for (; !gsi_end_p (gsi); gsi_next (&gsi))
324538fd1498Szrj {
324638fd1498Szrj gimple *stmt = gsi_stmt (gsi);
324738fd1498Szrj if (is_gimple_debug (stmt))
324838fd1498Szrj continue;
324938fd1498Szrj insns++;
325038fd1498Szrj
325138fd1498Szrj if (gimple_location (stmt) != UNKNOWN_LOCATION)
325238fd1498Szrj vect_location = gimple_location (stmt);
325338fd1498Szrj
325438fd1498Szrj if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
325538fd1498Szrj break;
325638fd1498Szrj }
325738fd1498Szrj
325838fd1498Szrj /* Skip leading unhandled stmts. */
325938fd1498Szrj if (gsi_stmt (region_begin) == gsi_stmt (gsi))
326038fd1498Szrj {
326138fd1498Szrj gsi_next (&gsi);
326238fd1498Szrj continue;
326338fd1498Szrj }
326438fd1498Szrj
326538fd1498Szrj gimple_stmt_iterator region_end = gsi;
326638fd1498Szrj
326738fd1498Szrj bool vectorized = false;
326838fd1498Szrj bool fatal = false;
326938fd1498Szrj bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
327038fd1498Szrj datarefs, insns, fatal);
327138fd1498Szrj if (bb_vinfo
327238fd1498Szrj && dbg_cnt (vect_slp))
327338fd1498Szrj {
327438fd1498Szrj if (dump_enabled_p ())
327538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
327638fd1498Szrj
327738fd1498Szrj vect_schedule_slp (bb_vinfo);
327838fd1498Szrj
327938fd1498Szrj if (dump_enabled_p ())
328038fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
328138fd1498Szrj "basic block part vectorized\n");
328238fd1498Szrj
328338fd1498Szrj vectorized = true;
328438fd1498Szrj }
328538fd1498Szrj delete bb_vinfo;
328638fd1498Szrj
328738fd1498Szrj any_vectorized |= vectorized;
328838fd1498Szrj
328938fd1498Szrj if (next_size == 0)
329038fd1498Szrj autodetected_vector_size = current_vector_size;
329138fd1498Szrj
329238fd1498Szrj if (next_size < vector_sizes.length ()
329338fd1498Szrj && known_eq (vector_sizes[next_size], autodetected_vector_size))
329438fd1498Szrj next_size += 1;
329538fd1498Szrj
329638fd1498Szrj if (vectorized
329738fd1498Szrj || next_size == vector_sizes.length ()
329838fd1498Szrj || known_eq (current_vector_size, 0U)
329938fd1498Szrj /* If vect_slp_analyze_bb_1 signaled that analysis for all
330038fd1498Szrj vector sizes will fail do not bother iterating. */
330138fd1498Szrj || fatal)
330238fd1498Szrj {
330338fd1498Szrj if (gsi_end_p (region_end))
330438fd1498Szrj break;
330538fd1498Szrj
330638fd1498Szrj /* Skip the unhandled stmt. */
330738fd1498Szrj gsi_next (&gsi);
330838fd1498Szrj
330938fd1498Szrj /* And reset vector sizes. */
331038fd1498Szrj current_vector_size = 0;
331138fd1498Szrj next_size = 0;
331238fd1498Szrj }
331338fd1498Szrj else
331438fd1498Szrj {
331538fd1498Szrj /* Try the next biggest vector size. */
331638fd1498Szrj current_vector_size = vector_sizes[next_size++];
331738fd1498Szrj if (dump_enabled_p ())
331838fd1498Szrj {
331938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
332038fd1498Szrj "***** Re-trying analysis with "
332138fd1498Szrj "vector size ");
332238fd1498Szrj dump_dec (MSG_NOTE, current_vector_size);
332338fd1498Szrj dump_printf (MSG_NOTE, "\n");
332438fd1498Szrj }
332538fd1498Szrj
332638fd1498Szrj /* Start over. */
332738fd1498Szrj gsi = region_begin;
332838fd1498Szrj }
332938fd1498Szrj }
333038fd1498Szrj
333138fd1498Szrj return any_vectorized;
333238fd1498Szrj }
333338fd1498Szrj
333438fd1498Szrj
333538fd1498Szrj /* Return 1 if vector type of boolean constant which is OPNUM
333638fd1498Szrj operand in statement STMT is a boolean vector. */
333738fd1498Szrj
333838fd1498Szrj static bool
vect_mask_constant_operand_p(gimple * stmt,int opnum)333938fd1498Szrj vect_mask_constant_operand_p (gimple *stmt, int opnum)
334038fd1498Szrj {
334138fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
334238fd1498Szrj enum tree_code code = gimple_expr_code (stmt);
334338fd1498Szrj tree op, vectype;
334438fd1498Szrj gimple *def_stmt;
334538fd1498Szrj enum vect_def_type dt;
334638fd1498Szrj
334738fd1498Szrj /* For comparison and COND_EXPR type is chosen depending
334838fd1498Szrj on the other comparison operand. */
334938fd1498Szrj if (TREE_CODE_CLASS (code) == tcc_comparison)
335038fd1498Szrj {
335138fd1498Szrj if (opnum)
335238fd1498Szrj op = gimple_assign_rhs1 (stmt);
335338fd1498Szrj else
335438fd1498Szrj op = gimple_assign_rhs2 (stmt);
335538fd1498Szrj
335638fd1498Szrj if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
335738fd1498Szrj &dt, &vectype))
335838fd1498Szrj gcc_unreachable ();
335938fd1498Szrj
336038fd1498Szrj return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
336138fd1498Szrj }
336238fd1498Szrj
336338fd1498Szrj if (code == COND_EXPR)
336438fd1498Szrj {
336538fd1498Szrj tree cond = gimple_assign_rhs1 (stmt);
336638fd1498Szrj
336738fd1498Szrj if (TREE_CODE (cond) == SSA_NAME)
336838fd1498Szrj op = cond;
336938fd1498Szrj else if (opnum)
337038fd1498Szrj op = TREE_OPERAND (cond, 1);
337138fd1498Szrj else
337238fd1498Szrj op = TREE_OPERAND (cond, 0);
337338fd1498Szrj
337438fd1498Szrj if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
337538fd1498Szrj &dt, &vectype))
337638fd1498Szrj gcc_unreachable ();
337738fd1498Szrj
337838fd1498Szrj return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
337938fd1498Szrj }
338038fd1498Szrj
338138fd1498Szrj return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
338238fd1498Szrj }
338338fd1498Szrj
338438fd1498Szrj /* Build a variable-length vector in which the elements in ELTS are repeated
338538fd1498Szrj to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
338638fd1498Szrj RESULTS and add any new instructions to SEQ.
338738fd1498Szrj
338838fd1498Szrj The approach we use is:
338938fd1498Szrj
339038fd1498Szrj (1) Find a vector mode VM with integer elements of mode IM.
339138fd1498Szrj
339238fd1498Szrj (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
339338fd1498Szrj ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
339438fd1498Szrj from small vectors to IM.
339538fd1498Szrj
339638fd1498Szrj (3) Duplicate each ELTS'[I] into a vector of mode VM.
339738fd1498Szrj
339838fd1498Szrj (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
339938fd1498Szrj correct byte contents.
340038fd1498Szrj
340138fd1498Szrj (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
340238fd1498Szrj
340338fd1498Szrj We try to find the largest IM for which this sequence works, in order
340438fd1498Szrj to cut down on the number of interleaves. */
340538fd1498Szrj
340638fd1498Szrj void
duplicate_and_interleave(gimple_seq * seq,tree vector_type,vec<tree> elts,unsigned int nresults,vec<tree> & results)340738fd1498Szrj duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
340838fd1498Szrj unsigned int nresults, vec<tree> &results)
340938fd1498Szrj {
341038fd1498Szrj unsigned int nelts = elts.length ();
341138fd1498Szrj tree element_type = TREE_TYPE (vector_type);
341238fd1498Szrj
341338fd1498Szrj /* (1) Find a vector mode VM with integer elements of mode IM. */
341438fd1498Szrj unsigned int nvectors = 1;
341538fd1498Szrj tree new_vector_type;
341638fd1498Szrj tree permutes[2];
341738fd1498Szrj if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
341838fd1498Szrj &nvectors, &new_vector_type,
341938fd1498Szrj permutes))
342038fd1498Szrj gcc_unreachable ();
342138fd1498Szrj
342238fd1498Szrj /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
342338fd1498Szrj unsigned int partial_nelts = nelts / nvectors;
342438fd1498Szrj tree partial_vector_type = build_vector_type (element_type, partial_nelts);
342538fd1498Szrj
342638fd1498Szrj tree_vector_builder partial_elts;
342738fd1498Szrj auto_vec<tree, 32> pieces (nvectors * 2);
342838fd1498Szrj pieces.quick_grow (nvectors * 2);
342938fd1498Szrj for (unsigned int i = 0; i < nvectors; ++i)
343038fd1498Szrj {
343138fd1498Szrj /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
343238fd1498Szrj ELTS' has mode IM. */
343338fd1498Szrj partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
343438fd1498Szrj for (unsigned int j = 0; j < partial_nelts; ++j)
343538fd1498Szrj partial_elts.quick_push (elts[i * partial_nelts + j]);
343638fd1498Szrj tree t = gimple_build_vector (seq, &partial_elts);
343738fd1498Szrj t = gimple_build (seq, VIEW_CONVERT_EXPR,
343838fd1498Szrj TREE_TYPE (new_vector_type), t);
343938fd1498Szrj
344038fd1498Szrj /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
344138fd1498Szrj pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
344238fd1498Szrj }
344338fd1498Szrj
344438fd1498Szrj /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
344538fd1498Szrj correct byte contents.
344638fd1498Szrj
344738fd1498Szrj We need to repeat the following operation log2(nvectors) times:
344838fd1498Szrj
344938fd1498Szrj out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
345038fd1498Szrj out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
345138fd1498Szrj
345238fd1498Szrj However, if each input repeats every N elements and the VF is
345338fd1498Szrj a multiple of N * 2, the HI result is the same as the LO. */
345438fd1498Szrj unsigned int in_start = 0;
345538fd1498Szrj unsigned int out_start = nvectors;
345638fd1498Szrj unsigned int hi_start = nvectors / 2;
345738fd1498Szrj /* A bound on the number of outputs needed to produce NRESULTS results
345838fd1498Szrj in the final iteration. */
345938fd1498Szrj unsigned int noutputs_bound = nvectors * nresults;
346038fd1498Szrj for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
346138fd1498Szrj {
346238fd1498Szrj noutputs_bound /= 2;
346338fd1498Szrj unsigned int limit = MIN (noutputs_bound, nvectors);
346438fd1498Szrj for (unsigned int i = 0; i < limit; ++i)
346538fd1498Szrj {
346638fd1498Szrj if ((i & 1) != 0
346738fd1498Szrj && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
346838fd1498Szrj 2 * in_repeat))
346938fd1498Szrj {
347038fd1498Szrj pieces[out_start + i] = pieces[out_start + i - 1];
347138fd1498Szrj continue;
347238fd1498Szrj }
347338fd1498Szrj
347438fd1498Szrj tree output = make_ssa_name (new_vector_type);
347538fd1498Szrj tree input1 = pieces[in_start + (i / 2)];
347638fd1498Szrj tree input2 = pieces[in_start + (i / 2) + hi_start];
347738fd1498Szrj gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
347838fd1498Szrj input1, input2,
347938fd1498Szrj permutes[i & 1]);
348038fd1498Szrj gimple_seq_add_stmt (seq, stmt);
348138fd1498Szrj pieces[out_start + i] = output;
348238fd1498Szrj }
348338fd1498Szrj std::swap (in_start, out_start);
348438fd1498Szrj }
348538fd1498Szrj
348638fd1498Szrj /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
348738fd1498Szrj results.reserve (nresults);
348838fd1498Szrj for (unsigned int i = 0; i < nresults; ++i)
348938fd1498Szrj if (i < nvectors)
349038fd1498Szrj results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
349138fd1498Szrj pieces[in_start + i]));
349238fd1498Szrj else
349338fd1498Szrj results.quick_push (results[i - nvectors]);
349438fd1498Szrj }
349538fd1498Szrj
349638fd1498Szrj
349738fd1498Szrj /* For constant and loop invariant defs of SLP_NODE this function returns
349838fd1498Szrj (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
349938fd1498Szrj OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
350038fd1498Szrj scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
350138fd1498Szrj REDUC_INDEX is the index of the reduction operand in the statements, unless
350238fd1498Szrj it is -1. */
350338fd1498Szrj
350438fd1498Szrj static void
vect_get_constant_vectors(tree op,slp_tree slp_node,vec<tree> * vec_oprnds,unsigned int op_num,unsigned int number_of_vectors)350538fd1498Szrj vect_get_constant_vectors (tree op, slp_tree slp_node,
350638fd1498Szrj vec<tree> *vec_oprnds,
350738fd1498Szrj unsigned int op_num, unsigned int number_of_vectors)
350838fd1498Szrj {
350938fd1498Szrj vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
351038fd1498Szrj gimple *stmt = stmts[0];
351138fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
351238fd1498Szrj unsigned HOST_WIDE_INT nunits;
351338fd1498Szrj tree vec_cst;
351438fd1498Szrj unsigned j, number_of_places_left_in_vector;
351538fd1498Szrj tree vector_type;
351638fd1498Szrj tree vop;
351738fd1498Szrj int group_size = stmts.length ();
351838fd1498Szrj unsigned int vec_num, i;
351938fd1498Szrj unsigned number_of_copies = 1;
352038fd1498Szrj vec<tree> voprnds;
352138fd1498Szrj voprnds.create (number_of_vectors);
352238fd1498Szrj bool constant_p, is_store;
352338fd1498Szrj tree neutral_op = NULL;
352438fd1498Szrj enum tree_code code = gimple_expr_code (stmt);
352538fd1498Szrj gimple_seq ctor_seq = NULL;
352638fd1498Szrj auto_vec<tree, 16> permute_results;
352738fd1498Szrj
352838fd1498Szrj /* Check if vector type is a boolean vector. */
352938fd1498Szrj if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
353038fd1498Szrj && vect_mask_constant_operand_p (stmt, op_num))
353138fd1498Szrj vector_type
353238fd1498Szrj = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
353338fd1498Szrj else
353438fd1498Szrj vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
353538fd1498Szrj
353638fd1498Szrj if (STMT_VINFO_DATA_REF (stmt_vinfo))
353738fd1498Szrj {
353838fd1498Szrj is_store = true;
353938fd1498Szrj op = gimple_assign_rhs1 (stmt);
354038fd1498Szrj }
354138fd1498Szrj else
354238fd1498Szrj is_store = false;
354338fd1498Szrj
354438fd1498Szrj gcc_assert (op);
354538fd1498Szrj
354638fd1498Szrj /* NUMBER_OF_COPIES is the number of times we need to use the same values in
354738fd1498Szrj created vectors. It is greater than 1 if unrolling is performed.
354838fd1498Szrj
354938fd1498Szrj For example, we have two scalar operands, s1 and s2 (e.g., group of
355038fd1498Szrj strided accesses of size two), while NUNITS is four (i.e., four scalars
355138fd1498Szrj of this type can be packed in a vector). The output vector will contain
355238fd1498Szrj two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
355338fd1498Szrj will be 2).
355438fd1498Szrj
355538fd1498Szrj If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
355638fd1498Szrj containing the operands.
355738fd1498Szrj
355838fd1498Szrj For example, NUNITS is four as before, and the group size is 8
355938fd1498Szrj (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
356038fd1498Szrj {s5, s6, s7, s8}. */
356138fd1498Szrj
356238fd1498Szrj /* When using duplicate_and_interleave, we just need one element for
356338fd1498Szrj each scalar statement. */
356438fd1498Szrj if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
356538fd1498Szrj nunits = group_size;
356638fd1498Szrj
356738fd1498Szrj number_of_copies = nunits * number_of_vectors / group_size;
356838fd1498Szrj
356938fd1498Szrj number_of_places_left_in_vector = nunits;
357038fd1498Szrj constant_p = true;
357138fd1498Szrj tree_vector_builder elts (vector_type, nunits, 1);
357238fd1498Szrj elts.quick_grow (nunits);
357338fd1498Szrj bool place_after_defs = false;
357438fd1498Szrj for (j = 0; j < number_of_copies; j++)
357538fd1498Szrj {
357638fd1498Szrj for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
357738fd1498Szrj {
357838fd1498Szrj if (is_store)
357938fd1498Szrj op = gimple_assign_rhs1 (stmt);
358038fd1498Szrj else
358138fd1498Szrj {
358238fd1498Szrj switch (code)
358338fd1498Szrj {
358438fd1498Szrj case COND_EXPR:
358538fd1498Szrj {
358638fd1498Szrj tree cond = gimple_assign_rhs1 (stmt);
358738fd1498Szrj if (TREE_CODE (cond) == SSA_NAME)
358838fd1498Szrj op = gimple_op (stmt, op_num + 1);
358938fd1498Szrj else if (op_num == 0 || op_num == 1)
359038fd1498Szrj op = TREE_OPERAND (cond, op_num);
359138fd1498Szrj else
359238fd1498Szrj {
359338fd1498Szrj if (op_num == 2)
359438fd1498Szrj op = gimple_assign_rhs2 (stmt);
359538fd1498Szrj else
359638fd1498Szrj op = gimple_assign_rhs3 (stmt);
359738fd1498Szrj }
359838fd1498Szrj }
359938fd1498Szrj break;
360038fd1498Szrj
360138fd1498Szrj case CALL_EXPR:
360238fd1498Szrj op = gimple_call_arg (stmt, op_num);
360338fd1498Szrj break;
360438fd1498Szrj
360538fd1498Szrj case LSHIFT_EXPR:
360638fd1498Szrj case RSHIFT_EXPR:
360738fd1498Szrj case LROTATE_EXPR:
360838fd1498Szrj case RROTATE_EXPR:
360938fd1498Szrj op = gimple_op (stmt, op_num + 1);
361038fd1498Szrj /* Unlike the other binary operators, shifts/rotates have
361138fd1498Szrj the shift count being int, instead of the same type as
361238fd1498Szrj the lhs, so make sure the scalar is the right type if
361338fd1498Szrj we are dealing with vectors of
361438fd1498Szrj long long/long/short/char. */
361538fd1498Szrj if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
361638fd1498Szrj op = fold_convert (TREE_TYPE (vector_type), op);
361738fd1498Szrj break;
361838fd1498Szrj
361938fd1498Szrj default:
362038fd1498Szrj op = gimple_op (stmt, op_num + 1);
362138fd1498Szrj break;
362238fd1498Szrj }
362338fd1498Szrj }
362438fd1498Szrj
362538fd1498Szrj /* Create 'vect_ = {op0,op1,...,opn}'. */
362638fd1498Szrj number_of_places_left_in_vector--;
362738fd1498Szrj tree orig_op = op;
362838fd1498Szrj if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
362938fd1498Szrj {
363038fd1498Szrj if (CONSTANT_CLASS_P (op))
363138fd1498Szrj {
363238fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (vector_type))
363338fd1498Szrj {
363438fd1498Szrj /* Can't use VIEW_CONVERT_EXPR for booleans because
363538fd1498Szrj of possibly different sizes of scalar value and
363638fd1498Szrj vector element. */
363738fd1498Szrj if (integer_zerop (op))
363838fd1498Szrj op = build_int_cst (TREE_TYPE (vector_type), 0);
363938fd1498Szrj else if (integer_onep (op))
364038fd1498Szrj op = build_all_ones_cst (TREE_TYPE (vector_type));
364138fd1498Szrj else
364238fd1498Szrj gcc_unreachable ();
364338fd1498Szrj }
364438fd1498Szrj else
364538fd1498Szrj op = fold_unary (VIEW_CONVERT_EXPR,
364638fd1498Szrj TREE_TYPE (vector_type), op);
364738fd1498Szrj gcc_assert (op && CONSTANT_CLASS_P (op));
364838fd1498Szrj }
364938fd1498Szrj else
365038fd1498Szrj {
365138fd1498Szrj tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
365238fd1498Szrj gimple *init_stmt;
365338fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (vector_type))
365438fd1498Szrj {
365538fd1498Szrj tree true_val
365638fd1498Szrj = build_all_ones_cst (TREE_TYPE (vector_type));
365738fd1498Szrj tree false_val
365838fd1498Szrj = build_zero_cst (TREE_TYPE (vector_type));
365938fd1498Szrj gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
366038fd1498Szrj init_stmt = gimple_build_assign (new_temp, COND_EXPR,
366138fd1498Szrj op, true_val,
366238fd1498Szrj false_val);
366338fd1498Szrj }
366438fd1498Szrj else
366538fd1498Szrj {
366638fd1498Szrj op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
366738fd1498Szrj op);
366838fd1498Szrj init_stmt
366938fd1498Szrj = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
367038fd1498Szrj op);
367138fd1498Szrj }
367238fd1498Szrj gimple_seq_add_stmt (&ctor_seq, init_stmt);
367338fd1498Szrj op = new_temp;
367438fd1498Szrj }
367538fd1498Szrj }
367638fd1498Szrj elts[number_of_places_left_in_vector] = op;
367738fd1498Szrj if (!CONSTANT_CLASS_P (op))
367838fd1498Szrj constant_p = false;
367938fd1498Szrj if (TREE_CODE (orig_op) == SSA_NAME
368038fd1498Szrj && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
368138fd1498Szrj && STMT_VINFO_BB_VINFO (stmt_vinfo)
368238fd1498Szrj && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
368338fd1498Szrj == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
368438fd1498Szrj place_after_defs = true;
368538fd1498Szrj
368638fd1498Szrj if (number_of_places_left_in_vector == 0)
368738fd1498Szrj {
368838fd1498Szrj if (constant_p
368938fd1498Szrj ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
369038fd1498Szrj : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
369138fd1498Szrj vec_cst = gimple_build_vector (&ctor_seq, &elts);
369238fd1498Szrj else
369338fd1498Szrj {
369438fd1498Szrj if (vec_oprnds->is_empty ())
369538fd1498Szrj duplicate_and_interleave (&ctor_seq, vector_type, elts,
369638fd1498Szrj number_of_vectors,
369738fd1498Szrj permute_results);
369838fd1498Szrj vec_cst = permute_results[number_of_vectors - j - 1];
369938fd1498Szrj }
370038fd1498Szrj tree init;
370138fd1498Szrj gimple_stmt_iterator gsi;
370238fd1498Szrj if (place_after_defs)
370338fd1498Szrj {
370438fd1498Szrj gsi = gsi_for_stmt
370538fd1498Szrj (vect_find_last_scalar_stmt_in_slp (slp_node));
370638fd1498Szrj init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
370738fd1498Szrj }
370838fd1498Szrj else
370938fd1498Szrj init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
371038fd1498Szrj if (ctor_seq != NULL)
371138fd1498Szrj {
371238fd1498Szrj gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
371338fd1498Szrj gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
371438fd1498Szrj ctor_seq = NULL;
371538fd1498Szrj }
371638fd1498Szrj voprnds.quick_push (init);
371738fd1498Szrj place_after_defs = false;
371838fd1498Szrj number_of_places_left_in_vector = nunits;
371938fd1498Szrj constant_p = true;
372038fd1498Szrj elts.new_vector (vector_type, nunits, 1);
372138fd1498Szrj elts.quick_grow (nunits);
372238fd1498Szrj }
372338fd1498Szrj }
372438fd1498Szrj }
372538fd1498Szrj
372638fd1498Szrj /* Since the vectors are created in the reverse order, we should invert
372738fd1498Szrj them. */
372838fd1498Szrj vec_num = voprnds.length ();
372938fd1498Szrj for (j = vec_num; j != 0; j--)
373038fd1498Szrj {
373138fd1498Szrj vop = voprnds[j - 1];
373238fd1498Szrj vec_oprnds->quick_push (vop);
373338fd1498Szrj }
373438fd1498Szrj
373538fd1498Szrj voprnds.release ();
373638fd1498Szrj
373738fd1498Szrj /* In case that VF is greater than the unrolling factor needed for the SLP
373838fd1498Szrj group of stmts, NUMBER_OF_VECTORS to be created is greater than
373938fd1498Szrj NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
374038fd1498Szrj to replicate the vectors. */
374138fd1498Szrj while (number_of_vectors > vec_oprnds->length ())
374238fd1498Szrj {
374338fd1498Szrj tree neutral_vec = NULL;
374438fd1498Szrj
374538fd1498Szrj if (neutral_op)
374638fd1498Szrj {
374738fd1498Szrj if (!neutral_vec)
374838fd1498Szrj neutral_vec = build_vector_from_val (vector_type, neutral_op);
374938fd1498Szrj
375038fd1498Szrj vec_oprnds->quick_push (neutral_vec);
375138fd1498Szrj }
375238fd1498Szrj else
375338fd1498Szrj {
375438fd1498Szrj for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
375538fd1498Szrj vec_oprnds->quick_push (vop);
375638fd1498Szrj }
375738fd1498Szrj }
375838fd1498Szrj }
375938fd1498Szrj
376038fd1498Szrj
376138fd1498Szrj /* Get vectorized definitions from SLP_NODE that contains corresponding
376238fd1498Szrj vectorized def-stmts. */
376338fd1498Szrj
376438fd1498Szrj static void
vect_get_slp_vect_defs(slp_tree slp_node,vec<tree> * vec_oprnds)376538fd1498Szrj vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
376638fd1498Szrj {
376738fd1498Szrj tree vec_oprnd;
376838fd1498Szrj gimple *vec_def_stmt;
376938fd1498Szrj unsigned int i;
377038fd1498Szrj
377138fd1498Szrj gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
377238fd1498Szrj
377338fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
377438fd1498Szrj {
377538fd1498Szrj gcc_assert (vec_def_stmt);
377638fd1498Szrj if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
377738fd1498Szrj vec_oprnd = gimple_phi_result (vec_def_stmt);
377838fd1498Szrj else
377938fd1498Szrj vec_oprnd = gimple_get_lhs (vec_def_stmt);
378038fd1498Szrj vec_oprnds->quick_push (vec_oprnd);
378138fd1498Szrj }
378238fd1498Szrj }
378338fd1498Szrj
378438fd1498Szrj
378538fd1498Szrj /* Get vectorized definitions for SLP_NODE.
378638fd1498Szrj If the scalar definitions are loop invariants or constants, collect them and
378738fd1498Szrj call vect_get_constant_vectors() to create vector stmts.
378838fd1498Szrj Otherwise, the def-stmts must be already vectorized and the vectorized stmts
378938fd1498Szrj must be stored in the corresponding child of SLP_NODE, and we call
379038fd1498Szrj vect_get_slp_vect_defs () to retrieve them. */
379138fd1498Szrj
379238fd1498Szrj void
vect_get_slp_defs(vec<tree> ops,slp_tree slp_node,vec<vec<tree>> * vec_oprnds)379338fd1498Szrj vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
379438fd1498Szrj vec<vec<tree> > *vec_oprnds)
379538fd1498Szrj {
379638fd1498Szrj gimple *first_stmt;
379738fd1498Szrj int number_of_vects = 0, i;
379838fd1498Szrj unsigned int child_index = 0;
379938fd1498Szrj HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
380038fd1498Szrj slp_tree child = NULL;
380138fd1498Szrj vec<tree> vec_defs;
380238fd1498Szrj tree oprnd;
380338fd1498Szrj bool vectorized_defs;
380438fd1498Szrj
380538fd1498Szrj first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
380638fd1498Szrj FOR_EACH_VEC_ELT (ops, i, oprnd)
380738fd1498Szrj {
380838fd1498Szrj /* For each operand we check if it has vectorized definitions in a child
380938fd1498Szrj node or we need to create them (for invariants and constants). We
381038fd1498Szrj check if the LHS of the first stmt of the next child matches OPRND.
381138fd1498Szrj If it does, we found the correct child. Otherwise, we call
381238fd1498Szrj vect_get_constant_vectors (), and not advance CHILD_INDEX in order
381338fd1498Szrj to check this child node for the next operand. */
381438fd1498Szrj vectorized_defs = false;
381538fd1498Szrj if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
381638fd1498Szrj {
381738fd1498Szrj child = SLP_TREE_CHILDREN (slp_node)[child_index];
381838fd1498Szrj
381938fd1498Szrj /* We have to check both pattern and original def, if available. */
382038fd1498Szrj if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
382138fd1498Szrj {
382238fd1498Szrj gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
382338fd1498Szrj gimple *related
382438fd1498Szrj = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
382538fd1498Szrj tree first_def_op;
382638fd1498Szrj
382738fd1498Szrj if (gimple_code (first_def) == GIMPLE_PHI)
382838fd1498Szrj first_def_op = gimple_phi_result (first_def);
382938fd1498Szrj else
383038fd1498Szrj first_def_op = gimple_get_lhs (first_def);
383138fd1498Szrj if (operand_equal_p (oprnd, first_def_op, 0)
383238fd1498Szrj || (related
383338fd1498Szrj && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
383438fd1498Szrj {
383538fd1498Szrj /* The number of vector defs is determined by the number of
383638fd1498Szrj vector statements in the node from which we get those
383738fd1498Szrj statements. */
383838fd1498Szrj number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
383938fd1498Szrj vectorized_defs = true;
384038fd1498Szrj child_index++;
384138fd1498Szrj }
384238fd1498Szrj }
384338fd1498Szrj else
384438fd1498Szrj child_index++;
384538fd1498Szrj }
384638fd1498Szrj
384738fd1498Szrj if (!vectorized_defs)
384838fd1498Szrj {
384938fd1498Szrj if (i == 0)
385038fd1498Szrj {
385138fd1498Szrj number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
385238fd1498Szrj /* Number of vector stmts was calculated according to LHS in
385338fd1498Szrj vect_schedule_slp_instance (), fix it by replacing LHS with
385438fd1498Szrj RHS, if necessary. See vect_get_smallest_scalar_type () for
385538fd1498Szrj details. */
385638fd1498Szrj vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
385738fd1498Szrj &rhs_size_unit);
385838fd1498Szrj if (rhs_size_unit != lhs_size_unit)
385938fd1498Szrj {
386038fd1498Szrj number_of_vects *= rhs_size_unit;
386138fd1498Szrj number_of_vects /= lhs_size_unit;
386238fd1498Szrj }
386338fd1498Szrj }
386438fd1498Szrj }
386538fd1498Szrj
386638fd1498Szrj /* Allocate memory for vectorized defs. */
386738fd1498Szrj vec_defs = vNULL;
386838fd1498Szrj vec_defs.create (number_of_vects);
386938fd1498Szrj
387038fd1498Szrj /* For reduction defs we call vect_get_constant_vectors (), since we are
387138fd1498Szrj looking for initial loop invariant values. */
387238fd1498Szrj if (vectorized_defs)
387338fd1498Szrj /* The defs are already vectorized. */
387438fd1498Szrj vect_get_slp_vect_defs (child, &vec_defs);
387538fd1498Szrj else
387638fd1498Szrj /* Build vectors from scalar defs. */
387738fd1498Szrj vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
387838fd1498Szrj number_of_vects);
387938fd1498Szrj
388038fd1498Szrj vec_oprnds->quick_push (vec_defs);
388138fd1498Szrj }
388238fd1498Szrj }
388338fd1498Szrj
388438fd1498Szrj /* Generate vector permute statements from a list of loads in DR_CHAIN.
388538fd1498Szrj If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
388638fd1498Szrj permute statements for the SLP node NODE of the SLP instance
388738fd1498Szrj SLP_NODE_INSTANCE. */
388838fd1498Szrj
388938fd1498Szrj bool
vect_transform_slp_perm_load(slp_tree node,vec<tree> dr_chain,gimple_stmt_iterator * gsi,poly_uint64 vf,slp_instance slp_node_instance,bool analyze_only,unsigned * n_perms)389038fd1498Szrj vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
389138fd1498Szrj gimple_stmt_iterator *gsi, poly_uint64 vf,
389238fd1498Szrj slp_instance slp_node_instance, bool analyze_only,
389338fd1498Szrj unsigned *n_perms)
389438fd1498Szrj {
389538fd1498Szrj gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
389638fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
389738fd1498Szrj tree mask_element_type = NULL_TREE, mask_type;
389838fd1498Szrj int vec_index = 0;
389938fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
390038fd1498Szrj int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
390138fd1498Szrj unsigned int mask_element;
390238fd1498Szrj machine_mode mode;
390338fd1498Szrj unsigned HOST_WIDE_INT nunits, const_vf;
390438fd1498Szrj
390538fd1498Szrj if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
390638fd1498Szrj return false;
390738fd1498Szrj
390838fd1498Szrj stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
390938fd1498Szrj
391038fd1498Szrj mode = TYPE_MODE (vectype);
391138fd1498Szrj
391238fd1498Szrj /* At the moment, all permutations are represented using per-element
391338fd1498Szrj indices, so we can't cope with variable vector lengths or
391438fd1498Szrj vectorization factors. */
391538fd1498Szrj if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
391638fd1498Szrj || !vf.is_constant (&const_vf))
391738fd1498Szrj return false;
391838fd1498Szrj
391938fd1498Szrj /* The generic VEC_PERM_EXPR code always uses an integral type of the
392038fd1498Szrj same size as the vector element being permuted. */
392138fd1498Szrj mask_element_type = lang_hooks.types.type_for_mode
392238fd1498Szrj (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
392338fd1498Szrj mask_type = get_vectype_for_scalar_type (mask_element_type);
392438fd1498Szrj vec_perm_builder mask (nunits, nunits, 1);
392538fd1498Szrj mask.quick_grow (nunits);
392638fd1498Szrj vec_perm_indices indices;
392738fd1498Szrj
392838fd1498Szrj /* Initialize the vect stmts of NODE to properly insert the generated
392938fd1498Szrj stmts later. */
393038fd1498Szrj if (! analyze_only)
393138fd1498Szrj for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
393238fd1498Szrj i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
393338fd1498Szrj SLP_TREE_VEC_STMTS (node).quick_push (NULL);
393438fd1498Szrj
393538fd1498Szrj /* Generate permutation masks for every NODE. Number of masks for each NODE
393638fd1498Szrj is equal to GROUP_SIZE.
393738fd1498Szrj E.g., we have a group of three nodes with three loads from the same
393838fd1498Szrj location in each node, and the vector size is 4. I.e., we have a
393938fd1498Szrj a0b0c0a1b1c1... sequence and we need to create the following vectors:
394038fd1498Szrj for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
394138fd1498Szrj for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
394238fd1498Szrj ...
394338fd1498Szrj
394438fd1498Szrj The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
394538fd1498Szrj The last mask is illegal since we assume two operands for permute
394638fd1498Szrj operation, and the mask element values can't be outside that range.
394738fd1498Szrj Hence, the last mask must be converted into {2,5,5,5}.
394838fd1498Szrj For the first two permutations we need the first and the second input
394938fd1498Szrj vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
395038fd1498Szrj we need the second and the third vectors: {b1,c1,a2,b2} and
395138fd1498Szrj {c2,a3,b3,c3}. */
395238fd1498Szrj
395338fd1498Szrj int vect_stmts_counter = 0;
395438fd1498Szrj unsigned int index = 0;
395538fd1498Szrj int first_vec_index = -1;
395638fd1498Szrj int second_vec_index = -1;
395738fd1498Szrj bool noop_p = true;
395838fd1498Szrj *n_perms = 0;
395938fd1498Szrj
396038fd1498Szrj for (unsigned int j = 0; j < const_vf; j++)
396138fd1498Szrj {
396238fd1498Szrj for (int k = 0; k < group_size; k++)
396338fd1498Szrj {
396438fd1498Szrj unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
396538fd1498Szrj + j * STMT_VINFO_GROUP_SIZE (stmt_info));
396638fd1498Szrj vec_index = i / nunits;
396738fd1498Szrj mask_element = i % nunits;
396838fd1498Szrj if (vec_index == first_vec_index
396938fd1498Szrj || first_vec_index == -1)
397038fd1498Szrj {
397138fd1498Szrj first_vec_index = vec_index;
397238fd1498Szrj }
397338fd1498Szrj else if (vec_index == second_vec_index
397438fd1498Szrj || second_vec_index == -1)
397538fd1498Szrj {
397638fd1498Szrj second_vec_index = vec_index;
397738fd1498Szrj mask_element += nunits;
397838fd1498Szrj }
397938fd1498Szrj else
398038fd1498Szrj {
398138fd1498Szrj if (dump_enabled_p ())
398238fd1498Szrj {
398338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
398438fd1498Szrj "permutation requires at "
398538fd1498Szrj "least three vectors ");
398638fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
398738fd1498Szrj stmt, 0);
398838fd1498Szrj }
398938fd1498Szrj gcc_assert (analyze_only);
399038fd1498Szrj return false;
399138fd1498Szrj }
399238fd1498Szrj
399338fd1498Szrj gcc_assert (mask_element < 2 * nunits);
399438fd1498Szrj if (mask_element != index)
399538fd1498Szrj noop_p = false;
399638fd1498Szrj mask[index++] = mask_element;
399738fd1498Szrj
399838fd1498Szrj if (index == nunits && !noop_p)
399938fd1498Szrj {
400038fd1498Szrj indices.new_vector (mask, 2, nunits);
400138fd1498Szrj if (!can_vec_perm_const_p (mode, indices))
400238fd1498Szrj {
400338fd1498Szrj if (dump_enabled_p ())
400438fd1498Szrj {
400538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION,
400638fd1498Szrj vect_location,
400738fd1498Szrj "unsupported vect permute { ");
400838fd1498Szrj for (i = 0; i < nunits; ++i)
400938fd1498Szrj {
401038fd1498Szrj dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
401138fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, " ");
401238fd1498Szrj }
401338fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
401438fd1498Szrj }
401538fd1498Szrj gcc_assert (analyze_only);
401638fd1498Szrj return false;
401738fd1498Szrj }
401838fd1498Szrj
401938fd1498Szrj ++*n_perms;
402038fd1498Szrj }
402138fd1498Szrj
402238fd1498Szrj if (index == nunits)
402338fd1498Szrj {
402438fd1498Szrj if (!analyze_only)
402538fd1498Szrj {
402638fd1498Szrj tree mask_vec = NULL_TREE;
402738fd1498Szrj
402838fd1498Szrj if (! noop_p)
402938fd1498Szrj mask_vec = vec_perm_indices_to_tree (mask_type, indices);
403038fd1498Szrj
403138fd1498Szrj if (second_vec_index == -1)
403238fd1498Szrj second_vec_index = first_vec_index;
403338fd1498Szrj
403438fd1498Szrj /* Generate the permute statement if necessary. */
403538fd1498Szrj tree first_vec = dr_chain[first_vec_index];
403638fd1498Szrj tree second_vec = dr_chain[second_vec_index];
403738fd1498Szrj gimple *perm_stmt;
403838fd1498Szrj if (! noop_p)
403938fd1498Szrj {
404038fd1498Szrj tree perm_dest
404138fd1498Szrj = vect_create_destination_var (gimple_assign_lhs (stmt),
404238fd1498Szrj vectype);
404338fd1498Szrj perm_dest = make_ssa_name (perm_dest);
404438fd1498Szrj perm_stmt = gimple_build_assign (perm_dest,
404538fd1498Szrj VEC_PERM_EXPR,
404638fd1498Szrj first_vec, second_vec,
404738fd1498Szrj mask_vec);
404838fd1498Szrj vect_finish_stmt_generation (stmt, perm_stmt, gsi);
404938fd1498Szrj }
405038fd1498Szrj else
405138fd1498Szrj /* If mask was NULL_TREE generate the requested
405238fd1498Szrj identity transform. */
405338fd1498Szrj perm_stmt = SSA_NAME_DEF_STMT (first_vec);
405438fd1498Szrj
405538fd1498Szrj /* Store the vector statement in NODE. */
405638fd1498Szrj SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
405738fd1498Szrj }
405838fd1498Szrj
405938fd1498Szrj index = 0;
406038fd1498Szrj first_vec_index = -1;
406138fd1498Szrj second_vec_index = -1;
406238fd1498Szrj noop_p = true;
406338fd1498Szrj }
406438fd1498Szrj }
406538fd1498Szrj }
406638fd1498Szrj
406738fd1498Szrj return true;
406838fd1498Szrj }
406938fd1498Szrj
407038fd1498Szrj typedef hash_map <vec <gimple *>, slp_tree,
407138fd1498Szrj simple_hashmap_traits <bst_traits, slp_tree> >
407238fd1498Szrj scalar_stmts_to_slp_tree_map_t;
407338fd1498Szrj
407438fd1498Szrj /* Vectorize SLP instance tree in postorder. */
407538fd1498Szrj
407638fd1498Szrj static bool
vect_schedule_slp_instance(slp_tree node,slp_instance instance,scalar_stmts_to_slp_tree_map_t * bst_map)407738fd1498Szrj vect_schedule_slp_instance (slp_tree node, slp_instance instance,
407838fd1498Szrj scalar_stmts_to_slp_tree_map_t *bst_map)
407938fd1498Szrj {
408038fd1498Szrj gimple *stmt;
408138fd1498Szrj bool grouped_store, is_store;
408238fd1498Szrj gimple_stmt_iterator si;
408338fd1498Szrj stmt_vec_info stmt_info;
408438fd1498Szrj unsigned int group_size;
408538fd1498Szrj tree vectype;
408638fd1498Szrj int i, j;
408738fd1498Szrj slp_tree child;
408838fd1498Szrj
408938fd1498Szrj if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
409038fd1498Szrj return false;
409138fd1498Szrj
409238fd1498Szrj /* See if we have already vectorized the same set of stmts and reuse their
409338fd1498Szrj vectorized stmts. */
4094*58e805e6Szrj if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
409538fd1498Szrj {
4096*58e805e6Szrj SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
4097*58e805e6Szrj SLP_TREE_NUMBER_OF_VEC_STMTS (node)
4098*58e805e6Szrj = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
409938fd1498Szrj return false;
410038fd1498Szrj }
410138fd1498Szrj
4102*58e805e6Szrj bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
410338fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
410438fd1498Szrj vect_schedule_slp_instance (child, instance, bst_map);
410538fd1498Szrj
410638fd1498Szrj /* Push SLP node def-type to stmts. */
410738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
410838fd1498Szrj if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
410938fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
411038fd1498Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
411138fd1498Szrj
411238fd1498Szrj stmt = SLP_TREE_SCALAR_STMTS (node)[0];
411338fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
411438fd1498Szrj
411538fd1498Szrj /* VECTYPE is the type of the destination. */
411638fd1498Szrj vectype = STMT_VINFO_VECTYPE (stmt_info);
411738fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
411838fd1498Szrj group_size = SLP_INSTANCE_GROUP_SIZE (instance);
411938fd1498Szrj
412038fd1498Szrj if (!SLP_TREE_VEC_STMTS (node).exists ())
412138fd1498Szrj SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
412238fd1498Szrj
412338fd1498Szrj if (dump_enabled_p ())
412438fd1498Szrj {
412538fd1498Szrj dump_printf_loc (MSG_NOTE,vect_location,
412638fd1498Szrj "------>vectorizing SLP node starting from: ");
412738fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
412838fd1498Szrj }
412938fd1498Szrj
413038fd1498Szrj /* Vectorized stmts go before the last scalar stmt which is where
413138fd1498Szrj all uses are ready. */
413238fd1498Szrj si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
413338fd1498Szrj
413438fd1498Szrj /* Mark the first element of the reduction chain as reduction to properly
413538fd1498Szrj transform the node. In the analysis phase only the last element of the
413638fd1498Szrj chain is marked as reduction. */
413738fd1498Szrj if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
413838fd1498Szrj && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
413938fd1498Szrj {
414038fd1498Szrj STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
414138fd1498Szrj STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
414238fd1498Szrj }
414338fd1498Szrj
414438fd1498Szrj /* Handle two-operation SLP nodes by vectorizing the group with
414538fd1498Szrj both operations and then performing a merge. */
414638fd1498Szrj if (SLP_TREE_TWO_OPERATORS (node))
414738fd1498Szrj {
414838fd1498Szrj enum tree_code code0 = gimple_assign_rhs_code (stmt);
414938fd1498Szrj enum tree_code ocode = ERROR_MARK;
415038fd1498Szrj gimple *ostmt;
415138fd1498Szrj vec_perm_builder mask (group_size, group_size, 1);
415238fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
415338fd1498Szrj if (gimple_assign_rhs_code (ostmt) != code0)
415438fd1498Szrj {
415538fd1498Szrj mask.quick_push (1);
415638fd1498Szrj ocode = gimple_assign_rhs_code (ostmt);
415738fd1498Szrj }
415838fd1498Szrj else
415938fd1498Szrj mask.quick_push (0);
416038fd1498Szrj if (ocode != ERROR_MARK)
416138fd1498Szrj {
416238fd1498Szrj vec<gimple *> v0;
416338fd1498Szrj vec<gimple *> v1;
416438fd1498Szrj unsigned j;
416538fd1498Szrj tree tmask = NULL_TREE;
416638fd1498Szrj vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
416738fd1498Szrj v0 = SLP_TREE_VEC_STMTS (node).copy ();
416838fd1498Szrj SLP_TREE_VEC_STMTS (node).truncate (0);
416938fd1498Szrj gimple_assign_set_rhs_code (stmt, ocode);
417038fd1498Szrj vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
417138fd1498Szrj gimple_assign_set_rhs_code (stmt, code0);
417238fd1498Szrj v1 = SLP_TREE_VEC_STMTS (node).copy ();
417338fd1498Szrj SLP_TREE_VEC_STMTS (node).truncate (0);
417438fd1498Szrj tree meltype = build_nonstandard_integer_type
417538fd1498Szrj (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
417638fd1498Szrj tree mvectype = get_same_sized_vectype (meltype, vectype);
417738fd1498Szrj unsigned k = 0, l;
417838fd1498Szrj for (j = 0; j < v0.length (); ++j)
417938fd1498Szrj {
418038fd1498Szrj /* Enforced by vect_build_slp_tree, which rejects variable-length
418138fd1498Szrj vectors for SLP_TREE_TWO_OPERATORS. */
418238fd1498Szrj unsigned int const_nunits = nunits.to_constant ();
418338fd1498Szrj tree_vector_builder melts (mvectype, const_nunits, 1);
418438fd1498Szrj for (l = 0; l < const_nunits; ++l)
418538fd1498Szrj {
418638fd1498Szrj if (k >= group_size)
418738fd1498Szrj k = 0;
418838fd1498Szrj tree t = build_int_cst (meltype,
418938fd1498Szrj mask[k++] * const_nunits + l);
419038fd1498Szrj melts.quick_push (t);
419138fd1498Szrj }
419238fd1498Szrj tmask = melts.build ();
419338fd1498Szrj
419438fd1498Szrj /* ??? Not all targets support a VEC_PERM_EXPR with a
419538fd1498Szrj constant mask that would translate to a vec_merge RTX
419638fd1498Szrj (with their vec_perm_const_ok). We can either not
419738fd1498Szrj vectorize in that case or let veclower do its job.
419838fd1498Szrj Unfortunately that isn't too great and at least for
419938fd1498Szrj plus/minus we'd eventually like to match targets
420038fd1498Szrj vector addsub instructions. */
420138fd1498Szrj gimple *vstmt;
420238fd1498Szrj vstmt = gimple_build_assign (make_ssa_name (vectype),
420338fd1498Szrj VEC_PERM_EXPR,
420438fd1498Szrj gimple_assign_lhs (v0[j]),
420538fd1498Szrj gimple_assign_lhs (v1[j]), tmask);
420638fd1498Szrj vect_finish_stmt_generation (stmt, vstmt, &si);
420738fd1498Szrj SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
420838fd1498Szrj }
420938fd1498Szrj v0.release ();
421038fd1498Szrj v1.release ();
421138fd1498Szrj return false;
421238fd1498Szrj }
421338fd1498Szrj }
421438fd1498Szrj is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
421538fd1498Szrj
421638fd1498Szrj /* Restore stmt def-types. */
421738fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
421838fd1498Szrj if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
421938fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
422038fd1498Szrj STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
422138fd1498Szrj
422238fd1498Szrj return is_store;
422338fd1498Szrj }
422438fd1498Szrj
422538fd1498Szrj /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
422638fd1498Szrj For loop vectorization this is done in vectorizable_call, but for SLP
422738fd1498Szrj it needs to be deferred until end of vect_schedule_slp, because multiple
422838fd1498Szrj SLP instances may refer to the same scalar stmt. */
422938fd1498Szrj
423038fd1498Szrj static void
vect_remove_slp_scalar_calls(slp_tree node)423138fd1498Szrj vect_remove_slp_scalar_calls (slp_tree node)
423238fd1498Szrj {
423338fd1498Szrj gimple *stmt, *new_stmt;
423438fd1498Szrj gimple_stmt_iterator gsi;
423538fd1498Szrj int i;
423638fd1498Szrj slp_tree child;
423738fd1498Szrj tree lhs;
423838fd1498Szrj stmt_vec_info stmt_info;
423938fd1498Szrj
424038fd1498Szrj if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
424138fd1498Szrj return;
424238fd1498Szrj
424338fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
424438fd1498Szrj vect_remove_slp_scalar_calls (child);
424538fd1498Szrj
424638fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
424738fd1498Szrj {
424838fd1498Szrj if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
424938fd1498Szrj continue;
425038fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
425138fd1498Szrj if (stmt_info == NULL
425238fd1498Szrj || is_pattern_stmt_p (stmt_info)
425338fd1498Szrj || !PURE_SLP_STMT (stmt_info))
425438fd1498Szrj continue;
425538fd1498Szrj lhs = gimple_call_lhs (stmt);
425638fd1498Szrj new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
425738fd1498Szrj set_vinfo_for_stmt (new_stmt, stmt_info);
425838fd1498Szrj set_vinfo_for_stmt (stmt, NULL);
425938fd1498Szrj STMT_VINFO_STMT (stmt_info) = new_stmt;
426038fd1498Szrj gsi = gsi_for_stmt (stmt);
426138fd1498Szrj gsi_replace (&gsi, new_stmt, false);
426238fd1498Szrj SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
426338fd1498Szrj }
426438fd1498Szrj }
426538fd1498Szrj
426638fd1498Szrj /* Generate vector code for all SLP instances in the loop/basic block. */
426738fd1498Szrj
426838fd1498Szrj bool
vect_schedule_slp(vec_info * vinfo)426938fd1498Szrj vect_schedule_slp (vec_info *vinfo)
427038fd1498Szrj {
427138fd1498Szrj vec<slp_instance> slp_instances;
427238fd1498Szrj slp_instance instance;
427338fd1498Szrj unsigned int i;
427438fd1498Szrj bool is_store = false;
427538fd1498Szrj
427638fd1498Szrj
427738fd1498Szrj scalar_stmts_to_slp_tree_map_t *bst_map
427838fd1498Szrj = new scalar_stmts_to_slp_tree_map_t ();
427938fd1498Szrj slp_instances = vinfo->slp_instances;
428038fd1498Szrj FOR_EACH_VEC_ELT (slp_instances, i, instance)
428138fd1498Szrj {
428238fd1498Szrj /* Schedule the tree of INSTANCE. */
428338fd1498Szrj is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
428438fd1498Szrj instance, bst_map);
428538fd1498Szrj if (dump_enabled_p ())
428638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
428738fd1498Szrj "vectorizing stmts using SLP.\n");
428838fd1498Szrj }
428938fd1498Szrj delete bst_map;
429038fd1498Szrj
429138fd1498Szrj FOR_EACH_VEC_ELT (slp_instances, i, instance)
429238fd1498Szrj {
429338fd1498Szrj slp_tree root = SLP_INSTANCE_TREE (instance);
429438fd1498Szrj gimple *store;
429538fd1498Szrj unsigned int j;
429638fd1498Szrj gimple_stmt_iterator gsi;
429738fd1498Szrj
429838fd1498Szrj /* Remove scalar call stmts. Do not do this for basic-block
429938fd1498Szrj vectorization as not all uses may be vectorized.
430038fd1498Szrj ??? Why should this be necessary? DCE should be able to
430138fd1498Szrj remove the stmts itself.
430238fd1498Szrj ??? For BB vectorization we can as well remove scalar
430338fd1498Szrj stmts starting from the SLP tree root if they have no
430438fd1498Szrj uses. */
430538fd1498Szrj if (is_a <loop_vec_info> (vinfo))
430638fd1498Szrj vect_remove_slp_scalar_calls (root);
430738fd1498Szrj
430838fd1498Szrj for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
430938fd1498Szrj && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
431038fd1498Szrj {
431138fd1498Szrj if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
431238fd1498Szrj break;
431338fd1498Szrj
431438fd1498Szrj if (is_pattern_stmt_p (vinfo_for_stmt (store)))
431538fd1498Szrj store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
431638fd1498Szrj /* Free the attached stmt_vec_info and remove the stmt. */
431738fd1498Szrj gsi = gsi_for_stmt (store);
431838fd1498Szrj unlink_stmt_vdef (store);
431938fd1498Szrj gsi_remove (&gsi, true);
432038fd1498Szrj release_defs (store);
432138fd1498Szrj free_stmt_vec_info (store);
432238fd1498Szrj }
432338fd1498Szrj }
432438fd1498Szrj
432538fd1498Szrj return is_store;
432638fd1498Szrj }
4327