138fd1498Szrj /* Statement Analysis and Transformation for Vectorization
238fd1498Szrj Copyright (C) 2003-2018 Free Software Foundation, Inc.
338fd1498Szrj Contributed by Dorit Naishlos <dorit@il.ibm.com>
438fd1498Szrj and Ira Rosen <irar@il.ibm.com>
538fd1498Szrj
638fd1498Szrj This file is part of GCC.
738fd1498Szrj
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
938fd1498Szrj the terms of the GNU General Public License as published by the Free
1038fd1498Szrj Software Foundation; either version 3, or (at your option) any later
1138fd1498Szrj version.
1238fd1498Szrj
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1438fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3. If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>. */
2138fd1498Szrj
2238fd1498Szrj #include "config.h"
2338fd1498Szrj #include "system.h"
2438fd1498Szrj #include "coretypes.h"
2538fd1498Szrj #include "backend.h"
2638fd1498Szrj #include "target.h"
2738fd1498Szrj #include "rtl.h"
2838fd1498Szrj #include "tree.h"
2938fd1498Szrj #include "gimple.h"
3038fd1498Szrj #include "ssa.h"
3138fd1498Szrj #include "optabs-tree.h"
3238fd1498Szrj #include "insn-config.h"
3338fd1498Szrj #include "recog.h" /* FIXME: for insn_data */
3438fd1498Szrj #include "cgraph.h"
3538fd1498Szrj #include "dumpfile.h"
3638fd1498Szrj #include "alias.h"
3738fd1498Szrj #include "fold-const.h"
3838fd1498Szrj #include "stor-layout.h"
3938fd1498Szrj #include "tree-eh.h"
4038fd1498Szrj #include "gimplify.h"
4138fd1498Szrj #include "gimple-iterator.h"
4238fd1498Szrj #include "gimplify-me.h"
4338fd1498Szrj #include "tree-cfg.h"
4438fd1498Szrj #include "tree-ssa-loop-manip.h"
4538fd1498Szrj #include "cfgloop.h"
4638fd1498Szrj #include "tree-ssa-loop.h"
4738fd1498Szrj #include "tree-scalar-evolution.h"
4838fd1498Szrj #include "tree-vectorizer.h"
4938fd1498Szrj #include "builtins.h"
5038fd1498Szrj #include "internal-fn.h"
5138fd1498Szrj #include "tree-vector-builder.h"
5238fd1498Szrj #include "vec-perm-indices.h"
5338fd1498Szrj #include "tree-ssa-loop-niter.h"
5438fd1498Szrj #include "gimple-fold.h"
5538fd1498Szrj
5638fd1498Szrj /* For lang_hooks.types.type_for_mode. */
5738fd1498Szrj #include "langhooks.h"
5838fd1498Szrj
5938fd1498Szrj /* Return the vectorized type for the given statement. */
6038fd1498Szrj
6138fd1498Szrj tree
stmt_vectype(struct _stmt_vec_info * stmt_info)6238fd1498Szrj stmt_vectype (struct _stmt_vec_info *stmt_info)
6338fd1498Szrj {
6438fd1498Szrj return STMT_VINFO_VECTYPE (stmt_info);
6538fd1498Szrj }
6638fd1498Szrj
6738fd1498Szrj /* Return TRUE iff the given statement is in an inner loop relative to
6838fd1498Szrj the loop being vectorized. */
6938fd1498Szrj bool
stmt_in_inner_loop_p(struct _stmt_vec_info * stmt_info)7038fd1498Szrj stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
7138fd1498Szrj {
7238fd1498Szrj gimple *stmt = STMT_VINFO_STMT (stmt_info);
7338fd1498Szrj basic_block bb = gimple_bb (stmt);
7438fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7538fd1498Szrj struct loop* loop;
7638fd1498Szrj
7738fd1498Szrj if (!loop_vinfo)
7838fd1498Szrj return false;
7938fd1498Szrj
8038fd1498Szrj loop = LOOP_VINFO_LOOP (loop_vinfo);
8138fd1498Szrj
8238fd1498Szrj return (bb->loop_father == loop->inner);
8338fd1498Szrj }
8438fd1498Szrj
8538fd1498Szrj /* Record the cost of a statement, either by directly informing the
8638fd1498Szrj target model or by saving it in a vector for later processing.
8738fd1498Szrj Return a preliminary estimate of the statement's cost. */
8838fd1498Szrj
8938fd1498Szrj unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)9038fd1498Szrj record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
9138fd1498Szrj enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
9238fd1498Szrj int misalign, enum vect_cost_model_location where)
9338fd1498Szrj {
9438fd1498Szrj if ((kind == vector_load || kind == unaligned_load)
9538fd1498Szrj && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9638fd1498Szrj kind = vector_gather_load;
9738fd1498Szrj if ((kind == vector_store || kind == unaligned_store)
9838fd1498Szrj && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9938fd1498Szrj kind = vector_scatter_store;
10038fd1498Szrj if (body_cost_vec)
10138fd1498Szrj {
10238fd1498Szrj tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10338fd1498Szrj stmt_info_for_cost si = { count, kind,
10438fd1498Szrj stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
10538fd1498Szrj misalign };
10638fd1498Szrj body_cost_vec->safe_push (si);
10738fd1498Szrj return (unsigned)
10838fd1498Szrj (builtin_vectorization_cost (kind, vectype, misalign) * count);
10938fd1498Szrj }
11038fd1498Szrj else
11138fd1498Szrj return add_stmt_cost (stmt_info->vinfo->target_cost_data,
11238fd1498Szrj count, kind, stmt_info, misalign, where);
11338fd1498Szrj }
11438fd1498Szrj
11538fd1498Szrj /* Return a variable of type ELEM_TYPE[NELEMS]. */
11638fd1498Szrj
11738fd1498Szrj static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)11838fd1498Szrj create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
11938fd1498Szrj {
12038fd1498Szrj return create_tmp_var (build_array_type_nelts (elem_type, nelems),
12138fd1498Szrj "vect_array");
12238fd1498Szrj }
12338fd1498Szrj
12438fd1498Szrj /* ARRAY is an array of vectors created by create_vector_array.
12538fd1498Szrj Return an SSA_NAME for the vector in index N. The reference
12638fd1498Szrj is part of the vectorization of STMT and the vector is associated
12738fd1498Szrj with scalar destination SCALAR_DEST. */
12838fd1498Szrj
12938fd1498Szrj static tree
read_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)13038fd1498Szrj read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
13138fd1498Szrj tree array, unsigned HOST_WIDE_INT n)
13238fd1498Szrj {
13338fd1498Szrj tree vect_type, vect, vect_name, array_ref;
13438fd1498Szrj gimple *new_stmt;
13538fd1498Szrj
13638fd1498Szrj gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
13738fd1498Szrj vect_type = TREE_TYPE (TREE_TYPE (array));
13838fd1498Szrj vect = vect_create_destination_var (scalar_dest, vect_type);
13938fd1498Szrj array_ref = build4 (ARRAY_REF, vect_type, array,
14038fd1498Szrj build_int_cst (size_type_node, n),
14138fd1498Szrj NULL_TREE, NULL_TREE);
14238fd1498Szrj
14338fd1498Szrj new_stmt = gimple_build_assign (vect, array_ref);
14438fd1498Szrj vect_name = make_ssa_name (vect, new_stmt);
14538fd1498Szrj gimple_assign_set_lhs (new_stmt, vect_name);
14638fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
14738fd1498Szrj
14838fd1498Szrj return vect_name;
14938fd1498Szrj }
15038fd1498Szrj
15138fd1498Szrj /* ARRAY is an array of vectors created by create_vector_array.
15238fd1498Szrj Emit code to store SSA_NAME VECT in index N of the array.
15338fd1498Szrj The store is part of the vectorization of STMT. */
15438fd1498Szrj
15538fd1498Szrj static void
write_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)15638fd1498Szrj write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
15738fd1498Szrj tree array, unsigned HOST_WIDE_INT n)
15838fd1498Szrj {
15938fd1498Szrj tree array_ref;
16038fd1498Szrj gimple *new_stmt;
16138fd1498Szrj
16238fd1498Szrj array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
16338fd1498Szrj build_int_cst (size_type_node, n),
16438fd1498Szrj NULL_TREE, NULL_TREE);
16538fd1498Szrj
16638fd1498Szrj new_stmt = gimple_build_assign (array_ref, vect);
16738fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
16838fd1498Szrj }
16938fd1498Szrj
17038fd1498Szrj /* PTR is a pointer to an array of type TYPE. Return a representation
17138fd1498Szrj of *PTR. The memory reference replaces those in FIRST_DR
17238fd1498Szrj (and its group). */
17338fd1498Szrj
17438fd1498Szrj static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)17538fd1498Szrj create_array_ref (tree type, tree ptr, tree alias_ptr_type)
17638fd1498Szrj {
17738fd1498Szrj tree mem_ref;
17838fd1498Szrj
17938fd1498Szrj mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
18038fd1498Szrj /* Arrays have the same alignment as their type. */
18138fd1498Szrj set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
18238fd1498Szrj return mem_ref;
18338fd1498Szrj }
18438fd1498Szrj
18538fd1498Szrj /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
18638fd1498Szrj
18738fd1498Szrj /* Function vect_mark_relevant.
18838fd1498Szrj
18938fd1498Szrj Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
19038fd1498Szrj
19138fd1498Szrj static void
vect_mark_relevant(vec<gimple * > * worklist,gimple * stmt,enum vect_relevant relevant,bool live_p)19238fd1498Szrj vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
19338fd1498Szrj enum vect_relevant relevant, bool live_p)
19438fd1498Szrj {
19538fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
19638fd1498Szrj enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
19738fd1498Szrj bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
19838fd1498Szrj gimple *pattern_stmt;
19938fd1498Szrj
20038fd1498Szrj if (dump_enabled_p ())
20138fd1498Szrj {
20238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
20338fd1498Szrj "mark relevant %d, live %d: ", relevant, live_p);
20438fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
20538fd1498Szrj }
20638fd1498Szrj
20738fd1498Szrj /* If this stmt is an original stmt in a pattern, we might need to mark its
20838fd1498Szrj related pattern stmt instead of the original stmt. However, such stmts
20938fd1498Szrj may have their own uses that are not in any pattern, in such cases the
21038fd1498Szrj stmt itself should be marked. */
21138fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_info))
21238fd1498Szrj {
21338fd1498Szrj /* This is the last stmt in a sequence that was detected as a
21438fd1498Szrj pattern that can potentially be vectorized. Don't mark the stmt
21538fd1498Szrj as relevant/live because it's not going to be vectorized.
21638fd1498Szrj Instead mark the pattern-stmt that replaces it. */
21738fd1498Szrj
21838fd1498Szrj pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
21938fd1498Szrj
22038fd1498Szrj if (dump_enabled_p ())
22138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
22238fd1498Szrj "last stmt in pattern. don't mark"
22338fd1498Szrj " relevant/live.\n");
22438fd1498Szrj stmt_info = vinfo_for_stmt (pattern_stmt);
22538fd1498Szrj gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
22638fd1498Szrj save_relevant = STMT_VINFO_RELEVANT (stmt_info);
22738fd1498Szrj save_live_p = STMT_VINFO_LIVE_P (stmt_info);
22838fd1498Szrj stmt = pattern_stmt;
22938fd1498Szrj }
23038fd1498Szrj
23138fd1498Szrj STMT_VINFO_LIVE_P (stmt_info) |= live_p;
23238fd1498Szrj if (relevant > STMT_VINFO_RELEVANT (stmt_info))
23338fd1498Szrj STMT_VINFO_RELEVANT (stmt_info) = relevant;
23438fd1498Szrj
23538fd1498Szrj if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
23638fd1498Szrj && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
23738fd1498Szrj {
23838fd1498Szrj if (dump_enabled_p ())
23938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
24038fd1498Szrj "already marked relevant/live.\n");
24138fd1498Szrj return;
24238fd1498Szrj }
24338fd1498Szrj
24438fd1498Szrj worklist->safe_push (stmt);
24538fd1498Szrj }
24638fd1498Szrj
24738fd1498Szrj
24838fd1498Szrj /* Function is_simple_and_all_uses_invariant
24938fd1498Szrj
25038fd1498Szrj Return true if STMT is simple and all uses of it are invariant. */
25138fd1498Szrj
25238fd1498Szrj bool
is_simple_and_all_uses_invariant(gimple * stmt,loop_vec_info loop_vinfo)25338fd1498Szrj is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
25438fd1498Szrj {
25538fd1498Szrj tree op;
25638fd1498Szrj gimple *def_stmt;
25738fd1498Szrj ssa_op_iter iter;
25838fd1498Szrj
25938fd1498Szrj if (!is_gimple_assign (stmt))
26038fd1498Szrj return false;
26138fd1498Szrj
26238fd1498Szrj FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
26338fd1498Szrj {
26438fd1498Szrj enum vect_def_type dt = vect_uninitialized_def;
26538fd1498Szrj
26638fd1498Szrj if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
26738fd1498Szrj {
26838fd1498Szrj if (dump_enabled_p ())
26938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
27038fd1498Szrj "use not simple.\n");
27138fd1498Szrj return false;
27238fd1498Szrj }
27338fd1498Szrj
27438fd1498Szrj if (dt != vect_external_def && dt != vect_constant_def)
27538fd1498Szrj return false;
27638fd1498Szrj }
27738fd1498Szrj return true;
27838fd1498Szrj }
27938fd1498Szrj
28038fd1498Szrj /* Function vect_stmt_relevant_p.
28138fd1498Szrj
28238fd1498Szrj Return true if STMT in loop that is represented by LOOP_VINFO is
28338fd1498Szrj "relevant for vectorization".
28438fd1498Szrj
28538fd1498Szrj A stmt is considered "relevant for vectorization" if:
28638fd1498Szrj - it has uses outside the loop.
28738fd1498Szrj - it has vdefs (it alters memory).
28838fd1498Szrj - control stmts in the loop (except for the exit condition).
28938fd1498Szrj
29038fd1498Szrj CHECKME: what other side effects would the vectorizer allow? */
29138fd1498Szrj
29238fd1498Szrj static bool
vect_stmt_relevant_p(gimple * stmt,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)29338fd1498Szrj vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
29438fd1498Szrj enum vect_relevant *relevant, bool *live_p)
29538fd1498Szrj {
29638fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
29738fd1498Szrj ssa_op_iter op_iter;
29838fd1498Szrj imm_use_iterator imm_iter;
29938fd1498Szrj use_operand_p use_p;
30038fd1498Szrj def_operand_p def_p;
30138fd1498Szrj
30238fd1498Szrj *relevant = vect_unused_in_scope;
30338fd1498Szrj *live_p = false;
30438fd1498Szrj
30538fd1498Szrj /* cond stmt other than loop exit cond. */
30638fd1498Szrj if (is_ctrl_stmt (stmt)
30738fd1498Szrj && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
30838fd1498Szrj != loop_exit_ctrl_vec_info_type)
30938fd1498Szrj *relevant = vect_used_in_scope;
31038fd1498Szrj
31138fd1498Szrj /* changing memory. */
31238fd1498Szrj if (gimple_code (stmt) != GIMPLE_PHI)
31338fd1498Szrj if (gimple_vdef (stmt)
31438fd1498Szrj && !gimple_clobber_p (stmt))
31538fd1498Szrj {
31638fd1498Szrj if (dump_enabled_p ())
31738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
31838fd1498Szrj "vec_stmt_relevant_p: stmt has vdefs.\n");
31938fd1498Szrj *relevant = vect_used_in_scope;
32038fd1498Szrj }
32138fd1498Szrj
32238fd1498Szrj /* uses outside the loop. */
32338fd1498Szrj FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
32438fd1498Szrj {
32538fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
32638fd1498Szrj {
32738fd1498Szrj basic_block bb = gimple_bb (USE_STMT (use_p));
32838fd1498Szrj if (!flow_bb_inside_loop_p (loop, bb))
32938fd1498Szrj {
33038fd1498Szrj if (dump_enabled_p ())
33138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
33238fd1498Szrj "vec_stmt_relevant_p: used out of loop.\n");
33338fd1498Szrj
33438fd1498Szrj if (is_gimple_debug (USE_STMT (use_p)))
33538fd1498Szrj continue;
33638fd1498Szrj
33738fd1498Szrj /* We expect all such uses to be in the loop exit phis
33838fd1498Szrj (because of loop closed form) */
33938fd1498Szrj gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
34038fd1498Szrj gcc_assert (bb == single_exit (loop)->dest);
34138fd1498Szrj
34238fd1498Szrj *live_p = true;
34338fd1498Szrj }
34438fd1498Szrj }
34538fd1498Szrj }
34638fd1498Szrj
34738fd1498Szrj if (*live_p && *relevant == vect_unused_in_scope
34838fd1498Szrj && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
34938fd1498Szrj {
35038fd1498Szrj if (dump_enabled_p ())
35138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
35238fd1498Szrj "vec_stmt_relevant_p: stmt live but not relevant.\n");
35338fd1498Szrj *relevant = vect_used_only_live;
35438fd1498Szrj }
35538fd1498Szrj
35638fd1498Szrj return (*live_p || *relevant);
35738fd1498Szrj }
35838fd1498Szrj
35938fd1498Szrj
36038fd1498Szrj /* Function exist_non_indexing_operands_for_use_p
36138fd1498Szrj
36238fd1498Szrj USE is one of the uses attached to STMT. Check if USE is
36338fd1498Szrj used in STMT for anything other than indexing an array. */
36438fd1498Szrj
36538fd1498Szrj static bool
exist_non_indexing_operands_for_use_p(tree use,gimple * stmt)36638fd1498Szrj exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
36738fd1498Szrj {
36838fd1498Szrj tree operand;
36938fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
37038fd1498Szrj
37138fd1498Szrj /* USE corresponds to some operand in STMT. If there is no data
37238fd1498Szrj reference in STMT, then any operand that corresponds to USE
37338fd1498Szrj is not indexing an array. */
37438fd1498Szrj if (!STMT_VINFO_DATA_REF (stmt_info))
37538fd1498Szrj return true;
37638fd1498Szrj
37738fd1498Szrj /* STMT has a data_ref. FORNOW this means that its of one of
37838fd1498Szrj the following forms:
37938fd1498Szrj -1- ARRAY_REF = var
38038fd1498Szrj -2- var = ARRAY_REF
38138fd1498Szrj (This should have been verified in analyze_data_refs).
38238fd1498Szrj
38338fd1498Szrj 'var' in the second case corresponds to a def, not a use,
38438fd1498Szrj so USE cannot correspond to any operands that are not used
38538fd1498Szrj for array indexing.
38638fd1498Szrj
38738fd1498Szrj Therefore, all we need to check is if STMT falls into the
38838fd1498Szrj first case, and whether var corresponds to USE. */
38938fd1498Szrj
39038fd1498Szrj if (!gimple_assign_copy_p (stmt))
39138fd1498Szrj {
39238fd1498Szrj if (is_gimple_call (stmt)
39338fd1498Szrj && gimple_call_internal_p (stmt))
39438fd1498Szrj {
39538fd1498Szrj internal_fn ifn = gimple_call_internal_fn (stmt);
39638fd1498Szrj int mask_index = internal_fn_mask_index (ifn);
39738fd1498Szrj if (mask_index >= 0
39838fd1498Szrj && use == gimple_call_arg (stmt, mask_index))
39938fd1498Szrj return true;
40038fd1498Szrj int stored_value_index = internal_fn_stored_value_index (ifn);
40138fd1498Szrj if (stored_value_index >= 0
40238fd1498Szrj && use == gimple_call_arg (stmt, stored_value_index))
40338fd1498Szrj return true;
40438fd1498Szrj if (internal_gather_scatter_fn_p (ifn)
40538fd1498Szrj && use == gimple_call_arg (stmt, 1))
40638fd1498Szrj return true;
40738fd1498Szrj }
40838fd1498Szrj return false;
40938fd1498Szrj }
41038fd1498Szrj
41138fd1498Szrj if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
41238fd1498Szrj return false;
41338fd1498Szrj operand = gimple_assign_rhs1 (stmt);
41438fd1498Szrj if (TREE_CODE (operand) != SSA_NAME)
41538fd1498Szrj return false;
41638fd1498Szrj
41738fd1498Szrj if (operand == use)
41838fd1498Szrj return true;
41938fd1498Szrj
42038fd1498Szrj return false;
42138fd1498Szrj }
42238fd1498Szrj
42338fd1498Szrj
42438fd1498Szrj /*
42538fd1498Szrj Function process_use.
42638fd1498Szrj
42738fd1498Szrj Inputs:
42838fd1498Szrj - a USE in STMT in a loop represented by LOOP_VINFO
42938fd1498Szrj - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
43038fd1498Szrj that defined USE. This is done by calling mark_relevant and passing it
43138fd1498Szrj the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
43238fd1498Szrj - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
43338fd1498Szrj be performed.
43438fd1498Szrj
43538fd1498Szrj Outputs:
43638fd1498Szrj Generally, LIVE_P and RELEVANT are used to define the liveness and
43738fd1498Szrj relevance info of the DEF_STMT of this USE:
43838fd1498Szrj STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
43938fd1498Szrj STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
44038fd1498Szrj Exceptions:
44138fd1498Szrj - case 1: If USE is used only for address computations (e.g. array indexing),
44238fd1498Szrj which does not need to be directly vectorized, then the liveness/relevance
44338fd1498Szrj of the respective DEF_STMT is left unchanged.
44438fd1498Szrj - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
44538fd1498Szrj skip DEF_STMT cause it had already been processed.
44638fd1498Szrj - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
44738fd1498Szrj be modified accordingly.
44838fd1498Szrj
44938fd1498Szrj Return true if everything is as expected. Return false otherwise. */
45038fd1498Szrj
45138fd1498Szrj static bool
process_use(gimple * stmt,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<gimple * > * worklist,bool force)45238fd1498Szrj process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
45338fd1498Szrj enum vect_relevant relevant, vec<gimple *> *worklist,
45438fd1498Szrj bool force)
45538fd1498Szrj {
45638fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
45738fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
45838fd1498Szrj stmt_vec_info dstmt_vinfo;
45938fd1498Szrj basic_block bb, def_bb;
46038fd1498Szrj gimple *def_stmt;
46138fd1498Szrj enum vect_def_type dt;
46238fd1498Szrj
46338fd1498Szrj /* case 1: we are only interested in uses that need to be vectorized. Uses
46438fd1498Szrj that are used for address computation are not considered relevant. */
46538fd1498Szrj if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
46638fd1498Szrj return true;
46738fd1498Szrj
46838fd1498Szrj if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
46938fd1498Szrj {
47038fd1498Szrj if (dump_enabled_p ())
47138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
47238fd1498Szrj "not vectorized: unsupported use in stmt.\n");
47338fd1498Szrj return false;
47438fd1498Szrj }
47538fd1498Szrj
47638fd1498Szrj if (!def_stmt || gimple_nop_p (def_stmt))
47738fd1498Szrj return true;
47838fd1498Szrj
47938fd1498Szrj def_bb = gimple_bb (def_stmt);
48038fd1498Szrj if (!flow_bb_inside_loop_p (loop, def_bb))
48138fd1498Szrj {
48238fd1498Szrj if (dump_enabled_p ())
48338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
48438fd1498Szrj return true;
48538fd1498Szrj }
48638fd1498Szrj
48738fd1498Szrj /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
48838fd1498Szrj DEF_STMT must have already been processed, because this should be the
48938fd1498Szrj only way that STMT, which is a reduction-phi, was put in the worklist,
49038fd1498Szrj as there should be no other uses for DEF_STMT in the loop. So we just
49138fd1498Szrj check that everything is as expected, and we are done. */
49238fd1498Szrj dstmt_vinfo = vinfo_for_stmt (def_stmt);
49338fd1498Szrj bb = gimple_bb (stmt);
49438fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI
49538fd1498Szrj && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
49638fd1498Szrj && gimple_code (def_stmt) != GIMPLE_PHI
49738fd1498Szrj && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
49838fd1498Szrj && bb->loop_father == def_bb->loop_father)
49938fd1498Szrj {
50038fd1498Szrj if (dump_enabled_p ())
50138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
50238fd1498Szrj "reduc-stmt defining reduc-phi in the same nest.\n");
50338fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
50438fd1498Szrj dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
50538fd1498Szrj gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
50638fd1498Szrj gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
50738fd1498Szrj || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
50838fd1498Szrj return true;
50938fd1498Szrj }
51038fd1498Szrj
51138fd1498Szrj /* case 3a: outer-loop stmt defining an inner-loop stmt:
51238fd1498Szrj outer-loop-header-bb:
51338fd1498Szrj d = def_stmt
51438fd1498Szrj inner-loop:
51538fd1498Szrj stmt # use (d)
51638fd1498Szrj outer-loop-tail-bb:
51738fd1498Szrj ... */
51838fd1498Szrj if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
51938fd1498Szrj {
52038fd1498Szrj if (dump_enabled_p ())
52138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
52238fd1498Szrj "outer-loop def-stmt defining inner-loop stmt.\n");
52338fd1498Szrj
52438fd1498Szrj switch (relevant)
52538fd1498Szrj {
52638fd1498Szrj case vect_unused_in_scope:
52738fd1498Szrj relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
52838fd1498Szrj vect_used_in_scope : vect_unused_in_scope;
52938fd1498Szrj break;
53038fd1498Szrj
53138fd1498Szrj case vect_used_in_outer_by_reduction:
53238fd1498Szrj gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
53338fd1498Szrj relevant = vect_used_by_reduction;
53438fd1498Szrj break;
53538fd1498Szrj
53638fd1498Szrj case vect_used_in_outer:
53738fd1498Szrj gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
53838fd1498Szrj relevant = vect_used_in_scope;
53938fd1498Szrj break;
54038fd1498Szrj
54138fd1498Szrj case vect_used_in_scope:
54238fd1498Szrj break;
54338fd1498Szrj
54438fd1498Szrj default:
54538fd1498Szrj gcc_unreachable ();
54638fd1498Szrj }
54738fd1498Szrj }
54838fd1498Szrj
54938fd1498Szrj /* case 3b: inner-loop stmt defining an outer-loop stmt:
55038fd1498Szrj outer-loop-header-bb:
55138fd1498Szrj ...
55238fd1498Szrj inner-loop:
55338fd1498Szrj d = def_stmt
55438fd1498Szrj outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
55538fd1498Szrj stmt # use (d) */
55638fd1498Szrj else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
55738fd1498Szrj {
55838fd1498Szrj if (dump_enabled_p ())
55938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
56038fd1498Szrj "inner-loop def-stmt defining outer-loop stmt.\n");
56138fd1498Szrj
56238fd1498Szrj switch (relevant)
56338fd1498Szrj {
56438fd1498Szrj case vect_unused_in_scope:
56538fd1498Szrj relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
56638fd1498Szrj || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
56738fd1498Szrj vect_used_in_outer_by_reduction : vect_unused_in_scope;
56838fd1498Szrj break;
56938fd1498Szrj
57038fd1498Szrj case vect_used_by_reduction:
57138fd1498Szrj case vect_used_only_live:
57238fd1498Szrj relevant = vect_used_in_outer_by_reduction;
57338fd1498Szrj break;
57438fd1498Szrj
57538fd1498Szrj case vect_used_in_scope:
57638fd1498Szrj relevant = vect_used_in_outer;
57738fd1498Szrj break;
57838fd1498Szrj
57938fd1498Szrj default:
58038fd1498Szrj gcc_unreachable ();
58138fd1498Szrj }
58238fd1498Szrj }
58338fd1498Szrj /* We are also not interested in uses on loop PHI backedges that are
58438fd1498Szrj inductions. Otherwise we'll needlessly vectorize the IV increment
58538fd1498Szrj and cause hybrid SLP for SLP inductions. Unless the PHI is live
58638fd1498Szrj of course. */
58738fd1498Szrj else if (gimple_code (stmt) == GIMPLE_PHI
58838fd1498Szrj && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
58938fd1498Szrj && ! STMT_VINFO_LIVE_P (stmt_vinfo)
59038fd1498Szrj && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
59138fd1498Szrj == use))
59238fd1498Szrj {
59338fd1498Szrj if (dump_enabled_p ())
59438fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
59538fd1498Szrj "induction value on backedge.\n");
59638fd1498Szrj return true;
59738fd1498Szrj }
59838fd1498Szrj
59938fd1498Szrj
60038fd1498Szrj vect_mark_relevant (worklist, def_stmt, relevant, false);
60138fd1498Szrj return true;
60238fd1498Szrj }
60338fd1498Szrj
60438fd1498Szrj
60538fd1498Szrj /* Function vect_mark_stmts_to_be_vectorized.
60638fd1498Szrj
60738fd1498Szrj Not all stmts in the loop need to be vectorized. For example:
60838fd1498Szrj
60938fd1498Szrj for i...
61038fd1498Szrj for j...
61138fd1498Szrj 1. T0 = i + j
61238fd1498Szrj 2. T1 = a[T0]
61338fd1498Szrj
61438fd1498Szrj 3. j = j + 1
61538fd1498Szrj
61638fd1498Szrj Stmt 1 and 3 do not need to be vectorized, because loop control and
61738fd1498Szrj addressing of vectorized data-refs are handled differently.
61838fd1498Szrj
61938fd1498Szrj This pass detects such stmts. */
62038fd1498Szrj
62138fd1498Szrj bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)62238fd1498Szrj vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
62338fd1498Szrj {
62438fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
62538fd1498Szrj basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
62638fd1498Szrj unsigned int nbbs = loop->num_nodes;
62738fd1498Szrj gimple_stmt_iterator si;
62838fd1498Szrj gimple *stmt;
62938fd1498Szrj unsigned int i;
63038fd1498Szrj stmt_vec_info stmt_vinfo;
63138fd1498Szrj basic_block bb;
63238fd1498Szrj gimple *phi;
63338fd1498Szrj bool live_p;
63438fd1498Szrj enum vect_relevant relevant;
63538fd1498Szrj
63638fd1498Szrj if (dump_enabled_p ())
63738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
63838fd1498Szrj "=== vect_mark_stmts_to_be_vectorized ===\n");
63938fd1498Szrj
64038fd1498Szrj auto_vec<gimple *, 64> worklist;
64138fd1498Szrj
64238fd1498Szrj /* 1. Init worklist. */
64338fd1498Szrj for (i = 0; i < nbbs; i++)
64438fd1498Szrj {
64538fd1498Szrj bb = bbs[i];
64638fd1498Szrj for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
64738fd1498Szrj {
64838fd1498Szrj phi = gsi_stmt (si);
64938fd1498Szrj if (dump_enabled_p ())
65038fd1498Szrj {
65138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
65238fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
65338fd1498Szrj }
65438fd1498Szrj
65538fd1498Szrj if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
65638fd1498Szrj vect_mark_relevant (&worklist, phi, relevant, live_p);
65738fd1498Szrj }
65838fd1498Szrj for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
65938fd1498Szrj {
66038fd1498Szrj stmt = gsi_stmt (si);
66138fd1498Szrj if (dump_enabled_p ())
66238fd1498Szrj {
66338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
66438fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
66538fd1498Szrj }
66638fd1498Szrj
66738fd1498Szrj if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
66838fd1498Szrj vect_mark_relevant (&worklist, stmt, relevant, live_p);
66938fd1498Szrj }
67038fd1498Szrj }
67138fd1498Szrj
67238fd1498Szrj /* 2. Process_worklist */
67338fd1498Szrj while (worklist.length () > 0)
67438fd1498Szrj {
67538fd1498Szrj use_operand_p use_p;
67638fd1498Szrj ssa_op_iter iter;
67738fd1498Szrj
67838fd1498Szrj stmt = worklist.pop ();
67938fd1498Szrj if (dump_enabled_p ())
68038fd1498Szrj {
68138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
68238fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
68338fd1498Szrj }
68438fd1498Szrj
68538fd1498Szrj /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
68638fd1498Szrj (DEF_STMT) as relevant/irrelevant according to the relevance property
68738fd1498Szrj of STMT. */
68838fd1498Szrj stmt_vinfo = vinfo_for_stmt (stmt);
68938fd1498Szrj relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
69038fd1498Szrj
69138fd1498Szrj /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
69238fd1498Szrj propagated as is to the DEF_STMTs of its USEs.
69338fd1498Szrj
69438fd1498Szrj One exception is when STMT has been identified as defining a reduction
69538fd1498Szrj variable; in this case we set the relevance to vect_used_by_reduction.
69638fd1498Szrj This is because we distinguish between two kinds of relevant stmts -
69738fd1498Szrj those that are used by a reduction computation, and those that are
69838fd1498Szrj (also) used by a regular computation. This allows us later on to
69938fd1498Szrj identify stmts that are used solely by a reduction, and therefore the
70038fd1498Szrj order of the results that they produce does not have to be kept. */
70138fd1498Szrj
70238fd1498Szrj switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
70338fd1498Szrj {
70438fd1498Szrj case vect_reduction_def:
70538fd1498Szrj gcc_assert (relevant != vect_unused_in_scope);
70638fd1498Szrj if (relevant != vect_unused_in_scope
70738fd1498Szrj && relevant != vect_used_in_scope
70838fd1498Szrj && relevant != vect_used_by_reduction
70938fd1498Szrj && relevant != vect_used_only_live)
71038fd1498Szrj {
71138fd1498Szrj if (dump_enabled_p ())
71238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
71338fd1498Szrj "unsupported use of reduction.\n");
71438fd1498Szrj return false;
71538fd1498Szrj }
71638fd1498Szrj break;
71738fd1498Szrj
71838fd1498Szrj case vect_nested_cycle:
71938fd1498Szrj if (relevant != vect_unused_in_scope
72038fd1498Szrj && relevant != vect_used_in_outer_by_reduction
72138fd1498Szrj && relevant != vect_used_in_outer)
72238fd1498Szrj {
72338fd1498Szrj if (dump_enabled_p ())
72438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
72538fd1498Szrj "unsupported use of nested cycle.\n");
72638fd1498Szrj
72738fd1498Szrj return false;
72838fd1498Szrj }
72938fd1498Szrj break;
73038fd1498Szrj
73138fd1498Szrj case vect_double_reduction_def:
73238fd1498Szrj if (relevant != vect_unused_in_scope
73338fd1498Szrj && relevant != vect_used_by_reduction
73438fd1498Szrj && relevant != vect_used_only_live)
73538fd1498Szrj {
73638fd1498Szrj if (dump_enabled_p ())
73738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
73838fd1498Szrj "unsupported use of double reduction.\n");
73938fd1498Szrj
74038fd1498Szrj return false;
74138fd1498Szrj }
74238fd1498Szrj break;
74338fd1498Szrj
74438fd1498Szrj default:
74538fd1498Szrj break;
74638fd1498Szrj }
74738fd1498Szrj
74838fd1498Szrj if (is_pattern_stmt_p (stmt_vinfo))
74938fd1498Szrj {
75038fd1498Szrj /* Pattern statements are not inserted into the code, so
75138fd1498Szrj FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
75238fd1498Szrj have to scan the RHS or function arguments instead. */
75338fd1498Szrj if (is_gimple_assign (stmt))
75438fd1498Szrj {
75538fd1498Szrj enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
75638fd1498Szrj tree op = gimple_assign_rhs1 (stmt);
75738fd1498Szrj
75838fd1498Szrj i = 1;
75938fd1498Szrj if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
76038fd1498Szrj {
76138fd1498Szrj if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
76238fd1498Szrj relevant, &worklist, false)
76338fd1498Szrj || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
76438fd1498Szrj relevant, &worklist, false))
76538fd1498Szrj return false;
76638fd1498Szrj i = 2;
76738fd1498Szrj }
76838fd1498Szrj for (; i < gimple_num_ops (stmt); i++)
76938fd1498Szrj {
77038fd1498Szrj op = gimple_op (stmt, i);
77138fd1498Szrj if (TREE_CODE (op) == SSA_NAME
77238fd1498Szrj && !process_use (stmt, op, loop_vinfo, relevant,
77338fd1498Szrj &worklist, false))
77438fd1498Szrj return false;
77538fd1498Szrj }
77638fd1498Szrj }
77738fd1498Szrj else if (is_gimple_call (stmt))
77838fd1498Szrj {
77938fd1498Szrj for (i = 0; i < gimple_call_num_args (stmt); i++)
78038fd1498Szrj {
78138fd1498Szrj tree arg = gimple_call_arg (stmt, i);
78238fd1498Szrj if (!process_use (stmt, arg, loop_vinfo, relevant,
78338fd1498Szrj &worklist, false))
78438fd1498Szrj return false;
78538fd1498Szrj }
78638fd1498Szrj }
78738fd1498Szrj }
78838fd1498Szrj else
78938fd1498Szrj FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
79038fd1498Szrj {
79138fd1498Szrj tree op = USE_FROM_PTR (use_p);
79238fd1498Szrj if (!process_use (stmt, op, loop_vinfo, relevant,
79338fd1498Szrj &worklist, false))
79438fd1498Szrj return false;
79538fd1498Szrj }
79638fd1498Szrj
79738fd1498Szrj if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
79838fd1498Szrj {
79938fd1498Szrj gather_scatter_info gs_info;
80038fd1498Szrj if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
80138fd1498Szrj gcc_unreachable ();
80238fd1498Szrj if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
80338fd1498Szrj &worklist, true))
80438fd1498Szrj return false;
80538fd1498Szrj }
80638fd1498Szrj } /* while worklist */
80738fd1498Szrj
80838fd1498Szrj return true;
80938fd1498Szrj }
81038fd1498Szrj
81138fd1498Szrj
81238fd1498Szrj /* Function vect_model_simple_cost.
81338fd1498Szrj
81438fd1498Szrj Models cost for simple operations, i.e. those that only emit ncopies of a
81538fd1498Szrj single op. Right now, this does not account for multiple insns that could
81638fd1498Szrj be generated for the single vector op. We will handle that shortly. */
81738fd1498Szrj
81838fd1498Szrj void
vect_model_simple_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)81938fd1498Szrj vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
82038fd1498Szrj enum vect_def_type *dt,
82138fd1498Szrj int ndts,
82238fd1498Szrj stmt_vector_for_cost *prologue_cost_vec,
82338fd1498Szrj stmt_vector_for_cost *body_cost_vec)
82438fd1498Szrj {
82538fd1498Szrj int i;
82638fd1498Szrj int inside_cost = 0, prologue_cost = 0;
82738fd1498Szrj
82838fd1498Szrj /* The SLP costs were already calculated during SLP tree build. */
82938fd1498Szrj gcc_assert (!PURE_SLP_STMT (stmt_info));
83038fd1498Szrj
83138fd1498Szrj /* Cost the "broadcast" of a scalar operand in to a vector operand.
83238fd1498Szrj Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
83338fd1498Szrj cost model. */
83438fd1498Szrj for (i = 0; i < ndts; i++)
83538fd1498Szrj if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
83638fd1498Szrj prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
83738fd1498Szrj stmt_info, 0, vect_prologue);
83838fd1498Szrj
83938fd1498Szrj /* Pass the inside-of-loop statements to the target-specific cost model. */
84038fd1498Szrj inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
84138fd1498Szrj stmt_info, 0, vect_body);
84238fd1498Szrj
84338fd1498Szrj if (dump_enabled_p ())
84438fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
84538fd1498Szrj "vect_model_simple_cost: inside_cost = %d, "
84638fd1498Szrj "prologue_cost = %d .\n", inside_cost, prologue_cost);
84738fd1498Szrj }
84838fd1498Szrj
84938fd1498Szrj
85038fd1498Szrj /* Model cost for type demotion and promotion operations. PWR is normally
85138fd1498Szrj zero for single-step promotions and demotions. It will be one if
85238fd1498Szrj two-step promotion/demotion is required, and so on. Each additional
85338fd1498Szrj step doubles the number of instructions required. */
85438fd1498Szrj
85538fd1498Szrj static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,int pwr)85638fd1498Szrj vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
85738fd1498Szrj enum vect_def_type *dt, int pwr)
85838fd1498Szrj {
85938fd1498Szrj int i, tmp;
86038fd1498Szrj int inside_cost = 0, prologue_cost = 0;
86138fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
86238fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
86338fd1498Szrj void *target_cost_data;
86438fd1498Szrj
86538fd1498Szrj /* The SLP costs were already calculated during SLP tree build. */
86638fd1498Szrj gcc_assert (!PURE_SLP_STMT (stmt_info));
86738fd1498Szrj
86838fd1498Szrj if (loop_vinfo)
86938fd1498Szrj target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
87038fd1498Szrj else
87138fd1498Szrj target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
87238fd1498Szrj
87338fd1498Szrj for (i = 0; i < pwr + 1; i++)
87438fd1498Szrj {
87538fd1498Szrj tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
87638fd1498Szrj (i + 1) : i;
87738fd1498Szrj inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
87838fd1498Szrj vec_promote_demote, stmt_info, 0,
87938fd1498Szrj vect_body);
88038fd1498Szrj }
88138fd1498Szrj
88238fd1498Szrj /* FORNOW: Assuming maximum 2 args per stmts. */
88338fd1498Szrj for (i = 0; i < 2; i++)
88438fd1498Szrj if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
88538fd1498Szrj prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
88638fd1498Szrj stmt_info, 0, vect_prologue);
88738fd1498Szrj
88838fd1498Szrj if (dump_enabled_p ())
88938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
89038fd1498Szrj "vect_model_promotion_demotion_cost: inside_cost = %d, "
89138fd1498Szrj "prologue_cost = %d .\n", inside_cost, prologue_cost);
89238fd1498Szrj }
89338fd1498Szrj
89438fd1498Szrj /* Function vect_model_store_cost
89538fd1498Szrj
89638fd1498Szrj Models cost for stores. In the case of grouped accesses, one access
89738fd1498Szrj has the overhead of the grouped access attributed to it. */
89838fd1498Szrj
89938fd1498Szrj void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)90038fd1498Szrj vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
90138fd1498Szrj vect_memory_access_type memory_access_type,
90238fd1498Szrj vec_load_store_type vls_type, slp_tree slp_node,
90338fd1498Szrj stmt_vector_for_cost *prologue_cost_vec,
90438fd1498Szrj stmt_vector_for_cost *body_cost_vec)
90538fd1498Szrj {
90638fd1498Szrj unsigned int inside_cost = 0, prologue_cost = 0;
90738fd1498Szrj struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
90838fd1498Szrj gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
90938fd1498Szrj bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
91038fd1498Szrj
91138fd1498Szrj if (vls_type == VLS_STORE_INVARIANT)
91238fd1498Szrj prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
91338fd1498Szrj stmt_info, 0, vect_prologue);
91438fd1498Szrj
91538fd1498Szrj /* Grouped stores update all elements in the group at once,
91638fd1498Szrj so we want the DR for the first statement. */
91738fd1498Szrj if (!slp_node && grouped_access_p)
91838fd1498Szrj {
91938fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
92038fd1498Szrj dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
92138fd1498Szrj }
92238fd1498Szrj
92338fd1498Szrj /* True if we should include any once-per-group costs as well as
92438fd1498Szrj the cost of the statement itself. For SLP we only get called
92538fd1498Szrj once per group anyhow. */
92638fd1498Szrj bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
92738fd1498Szrj
92838fd1498Szrj /* We assume that the cost of a single store-lanes instruction is
92938fd1498Szrj equivalent to the cost of GROUP_SIZE separate stores. If a grouped
93038fd1498Szrj access is instead being provided by a permute-and-store operation,
93138fd1498Szrj include the cost of the permutes. */
93238fd1498Szrj if (first_stmt_p
93338fd1498Szrj && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
93438fd1498Szrj {
93538fd1498Szrj /* Uses a high and low interleave or shuffle operations for each
93638fd1498Szrj needed permute. */
93738fd1498Szrj int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
93838fd1498Szrj int nstmts = ncopies * ceil_log2 (group_size) * group_size;
93938fd1498Szrj inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
94038fd1498Szrj stmt_info, 0, vect_body);
94138fd1498Szrj
94238fd1498Szrj if (dump_enabled_p ())
94338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
94438fd1498Szrj "vect_model_store_cost: strided group_size = %d .\n",
94538fd1498Szrj group_size);
94638fd1498Szrj }
94738fd1498Szrj
94838fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
94938fd1498Szrj /* Costs of the stores. */
95038fd1498Szrj if (memory_access_type == VMAT_ELEMENTWISE
95138fd1498Szrj || memory_access_type == VMAT_GATHER_SCATTER)
95238fd1498Szrj {
95338fd1498Szrj /* N scalar stores plus extracting the elements. */
95438fd1498Szrj unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
95538fd1498Szrj inside_cost += record_stmt_cost (body_cost_vec,
95638fd1498Szrj ncopies * assumed_nunits,
95738fd1498Szrj scalar_store, stmt_info, 0, vect_body);
95838fd1498Szrj }
95938fd1498Szrj else
96038fd1498Szrj vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
96138fd1498Szrj
96238fd1498Szrj if (memory_access_type == VMAT_ELEMENTWISE
96338fd1498Szrj || memory_access_type == VMAT_STRIDED_SLP)
96438fd1498Szrj {
96538fd1498Szrj /* N scalar stores plus extracting the elements. */
96638fd1498Szrj unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
96738fd1498Szrj inside_cost += record_stmt_cost (body_cost_vec,
96838fd1498Szrj ncopies * assumed_nunits,
96938fd1498Szrj vec_to_scalar, stmt_info, 0, vect_body);
97038fd1498Szrj }
97138fd1498Szrj
97238fd1498Szrj if (dump_enabled_p ())
97338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
97438fd1498Szrj "vect_model_store_cost: inside_cost = %d, "
97538fd1498Szrj "prologue_cost = %d .\n", inside_cost, prologue_cost);
97638fd1498Szrj }
97738fd1498Szrj
97838fd1498Szrj
97938fd1498Szrj /* Calculate cost of DR's memory access. */
98038fd1498Szrj void
vect_get_store_cost(struct data_reference * dr,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)98138fd1498Szrj vect_get_store_cost (struct data_reference *dr, int ncopies,
98238fd1498Szrj unsigned int *inside_cost,
98338fd1498Szrj stmt_vector_for_cost *body_cost_vec)
98438fd1498Szrj {
98538fd1498Szrj int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
98638fd1498Szrj gimple *stmt = DR_STMT (dr);
98738fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
98838fd1498Szrj
98938fd1498Szrj switch (alignment_support_scheme)
99038fd1498Szrj {
99138fd1498Szrj case dr_aligned:
99238fd1498Szrj {
99338fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
99438fd1498Szrj vector_store, stmt_info, 0,
99538fd1498Szrj vect_body);
99638fd1498Szrj
99738fd1498Szrj if (dump_enabled_p ())
99838fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
99938fd1498Szrj "vect_model_store_cost: aligned.\n");
100038fd1498Szrj break;
100138fd1498Szrj }
100238fd1498Szrj
100338fd1498Szrj case dr_unaligned_supported:
100438fd1498Szrj {
100538fd1498Szrj /* Here, we assign an additional cost for the unaligned store. */
100638fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
100738fd1498Szrj unaligned_store, stmt_info,
100838fd1498Szrj DR_MISALIGNMENT (dr), vect_body);
100938fd1498Szrj if (dump_enabled_p ())
101038fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
101138fd1498Szrj "vect_model_store_cost: unaligned supported by "
101238fd1498Szrj "hardware.\n");
101338fd1498Szrj break;
101438fd1498Szrj }
101538fd1498Szrj
101638fd1498Szrj case dr_unaligned_unsupported:
101738fd1498Szrj {
101838fd1498Szrj *inside_cost = VECT_MAX_COST;
101938fd1498Szrj
102038fd1498Szrj if (dump_enabled_p ())
102138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
102238fd1498Szrj "vect_model_store_cost: unsupported access.\n");
102338fd1498Szrj break;
102438fd1498Szrj }
102538fd1498Szrj
102638fd1498Szrj default:
102738fd1498Szrj gcc_unreachable ();
102838fd1498Szrj }
102938fd1498Szrj }
103038fd1498Szrj
103138fd1498Szrj
103238fd1498Szrj /* Function vect_model_load_cost
103338fd1498Szrj
103438fd1498Szrj Models cost for loads. In the case of grouped accesses, one access has
103538fd1498Szrj the overhead of the grouped access attributed to it. Since unaligned
103638fd1498Szrj accesses are supported for loads, we also account for the costs of the
103738fd1498Szrj access scheme chosen. */
103838fd1498Szrj
103938fd1498Szrj void
vect_model_load_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)104038fd1498Szrj vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
104138fd1498Szrj vect_memory_access_type memory_access_type,
104238fd1498Szrj slp_tree slp_node,
104338fd1498Szrj stmt_vector_for_cost *prologue_cost_vec,
104438fd1498Szrj stmt_vector_for_cost *body_cost_vec)
104538fd1498Szrj {
104638fd1498Szrj gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
104738fd1498Szrj struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
104838fd1498Szrj unsigned int inside_cost = 0, prologue_cost = 0;
104938fd1498Szrj bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
105038fd1498Szrj
105138fd1498Szrj /* Grouped loads read all elements in the group at once,
105238fd1498Szrj so we want the DR for the first statement. */
105338fd1498Szrj if (!slp_node && grouped_access_p)
105438fd1498Szrj {
105538fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
105638fd1498Szrj dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
105738fd1498Szrj }
105838fd1498Szrj
105938fd1498Szrj /* True if we should include any once-per-group costs as well as
106038fd1498Szrj the cost of the statement itself. For SLP we only get called
106138fd1498Szrj once per group anyhow. */
106238fd1498Szrj bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
106338fd1498Szrj
106438fd1498Szrj /* We assume that the cost of a single load-lanes instruction is
106538fd1498Szrj equivalent to the cost of GROUP_SIZE separate loads. If a grouped
106638fd1498Szrj access is instead being provided by a load-and-permute operation,
106738fd1498Szrj include the cost of the permutes. */
106838fd1498Szrj if (first_stmt_p
106938fd1498Szrj && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
107038fd1498Szrj {
107138fd1498Szrj /* Uses an even and odd extract operations or shuffle operations
107238fd1498Szrj for each needed permute. */
107338fd1498Szrj int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
107438fd1498Szrj int nstmts = ncopies * ceil_log2 (group_size) * group_size;
107538fd1498Szrj inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
107638fd1498Szrj stmt_info, 0, vect_body);
107738fd1498Szrj
107838fd1498Szrj if (dump_enabled_p ())
107938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
108038fd1498Szrj "vect_model_load_cost: strided group_size = %d .\n",
108138fd1498Szrj group_size);
108238fd1498Szrj }
108338fd1498Szrj
108438fd1498Szrj /* The loads themselves. */
108538fd1498Szrj if (memory_access_type == VMAT_ELEMENTWISE
108638fd1498Szrj || memory_access_type == VMAT_GATHER_SCATTER)
108738fd1498Szrj {
108838fd1498Szrj /* N scalar loads plus gathering them into a vector. */
108938fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
109038fd1498Szrj unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
109138fd1498Szrj inside_cost += record_stmt_cost (body_cost_vec,
109238fd1498Szrj ncopies * assumed_nunits,
109338fd1498Szrj scalar_load, stmt_info, 0, vect_body);
109438fd1498Szrj }
109538fd1498Szrj else
109638fd1498Szrj vect_get_load_cost (dr, ncopies, first_stmt_p,
109738fd1498Szrj &inside_cost, &prologue_cost,
109838fd1498Szrj prologue_cost_vec, body_cost_vec, true);
109938fd1498Szrj if (memory_access_type == VMAT_ELEMENTWISE
110038fd1498Szrj || memory_access_type == VMAT_STRIDED_SLP)
110138fd1498Szrj inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
110238fd1498Szrj stmt_info, 0, vect_body);
110338fd1498Szrj
110438fd1498Szrj if (dump_enabled_p ())
110538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
110638fd1498Szrj "vect_model_load_cost: inside_cost = %d, "
110738fd1498Szrj "prologue_cost = %d .\n", inside_cost, prologue_cost);
110838fd1498Szrj }
110938fd1498Szrj
111038fd1498Szrj
111138fd1498Szrj /* Calculate cost of DR's memory access. */
111238fd1498Szrj void
vect_get_load_cost(struct data_reference * dr,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)111338fd1498Szrj vect_get_load_cost (struct data_reference *dr, int ncopies,
111438fd1498Szrj bool add_realign_cost, unsigned int *inside_cost,
111538fd1498Szrj unsigned int *prologue_cost,
111638fd1498Szrj stmt_vector_for_cost *prologue_cost_vec,
111738fd1498Szrj stmt_vector_for_cost *body_cost_vec,
111838fd1498Szrj bool record_prologue_costs)
111938fd1498Szrj {
112038fd1498Szrj int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
112138fd1498Szrj gimple *stmt = DR_STMT (dr);
112238fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
112338fd1498Szrj
112438fd1498Szrj switch (alignment_support_scheme)
112538fd1498Szrj {
112638fd1498Szrj case dr_aligned:
112738fd1498Szrj {
112838fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
112938fd1498Szrj stmt_info, 0, vect_body);
113038fd1498Szrj
113138fd1498Szrj if (dump_enabled_p ())
113238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
113338fd1498Szrj "vect_model_load_cost: aligned.\n");
113438fd1498Szrj
113538fd1498Szrj break;
113638fd1498Szrj }
113738fd1498Szrj case dr_unaligned_supported:
113838fd1498Szrj {
113938fd1498Szrj /* Here, we assign an additional cost for the unaligned load. */
114038fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
114138fd1498Szrj unaligned_load, stmt_info,
114238fd1498Szrj DR_MISALIGNMENT (dr), vect_body);
114338fd1498Szrj
114438fd1498Szrj if (dump_enabled_p ())
114538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
114638fd1498Szrj "vect_model_load_cost: unaligned supported by "
114738fd1498Szrj "hardware.\n");
114838fd1498Szrj
114938fd1498Szrj break;
115038fd1498Szrj }
115138fd1498Szrj case dr_explicit_realign:
115238fd1498Szrj {
115338fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
115438fd1498Szrj vector_load, stmt_info, 0, vect_body);
115538fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
115638fd1498Szrj vec_perm, stmt_info, 0, vect_body);
115738fd1498Szrj
115838fd1498Szrj /* FIXME: If the misalignment remains fixed across the iterations of
115938fd1498Szrj the containing loop, the following cost should be added to the
116038fd1498Szrj prologue costs. */
116138fd1498Szrj if (targetm.vectorize.builtin_mask_for_load)
116238fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
116338fd1498Szrj stmt_info, 0, vect_body);
116438fd1498Szrj
116538fd1498Szrj if (dump_enabled_p ())
116638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
116738fd1498Szrj "vect_model_load_cost: explicit realign\n");
116838fd1498Szrj
116938fd1498Szrj break;
117038fd1498Szrj }
117138fd1498Szrj case dr_explicit_realign_optimized:
117238fd1498Szrj {
117338fd1498Szrj if (dump_enabled_p ())
117438fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
117538fd1498Szrj "vect_model_load_cost: unaligned software "
117638fd1498Szrj "pipelined.\n");
117738fd1498Szrj
117838fd1498Szrj /* Unaligned software pipeline has a load of an address, an initial
117938fd1498Szrj load, and possibly a mask operation to "prime" the loop. However,
118038fd1498Szrj if this is an access in a group of loads, which provide grouped
118138fd1498Szrj access, then the above cost should only be considered for one
118238fd1498Szrj access in the group. Inside the loop, there is a load op
118338fd1498Szrj and a realignment op. */
118438fd1498Szrj
118538fd1498Szrj if (add_realign_cost && record_prologue_costs)
118638fd1498Szrj {
118738fd1498Szrj *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
118838fd1498Szrj vector_stmt, stmt_info,
118938fd1498Szrj 0, vect_prologue);
119038fd1498Szrj if (targetm.vectorize.builtin_mask_for_load)
119138fd1498Szrj *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
119238fd1498Szrj vector_stmt, stmt_info,
119338fd1498Szrj 0, vect_prologue);
119438fd1498Szrj }
119538fd1498Szrj
119638fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
119738fd1498Szrj stmt_info, 0, vect_body);
119838fd1498Szrj *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
119938fd1498Szrj stmt_info, 0, vect_body);
120038fd1498Szrj
120138fd1498Szrj if (dump_enabled_p ())
120238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
120338fd1498Szrj "vect_model_load_cost: explicit realign optimized"
120438fd1498Szrj "\n");
120538fd1498Szrj
120638fd1498Szrj break;
120738fd1498Szrj }
120838fd1498Szrj
120938fd1498Szrj case dr_unaligned_unsupported:
121038fd1498Szrj {
121138fd1498Szrj *inside_cost = VECT_MAX_COST;
121238fd1498Szrj
121338fd1498Szrj if (dump_enabled_p ())
121438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
121538fd1498Szrj "vect_model_load_cost: unsupported access.\n");
121638fd1498Szrj break;
121738fd1498Szrj }
121838fd1498Szrj
121938fd1498Szrj default:
122038fd1498Szrj gcc_unreachable ();
122138fd1498Szrj }
122238fd1498Szrj }
122338fd1498Szrj
122438fd1498Szrj /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
122538fd1498Szrj the loop preheader for the vectorized stmt STMT. */
122638fd1498Szrj
122738fd1498Szrj static void
vect_init_vector_1(gimple * stmt,gimple * new_stmt,gimple_stmt_iterator * gsi)122838fd1498Szrj vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
122938fd1498Szrj {
123038fd1498Szrj if (gsi)
123138fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
123238fd1498Szrj else
123338fd1498Szrj {
123438fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
123538fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
123638fd1498Szrj
123738fd1498Szrj if (loop_vinfo)
123838fd1498Szrj {
123938fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124038fd1498Szrj basic_block new_bb;
124138fd1498Szrj edge pe;
124238fd1498Szrj
124338fd1498Szrj if (nested_in_vect_loop_p (loop, stmt))
124438fd1498Szrj loop = loop->inner;
124538fd1498Szrj
124638fd1498Szrj pe = loop_preheader_edge (loop);
124738fd1498Szrj new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
124838fd1498Szrj gcc_assert (!new_bb);
124938fd1498Szrj }
125038fd1498Szrj else
125138fd1498Szrj {
125238fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
125338fd1498Szrj basic_block bb;
125438fd1498Szrj gimple_stmt_iterator gsi_bb_start;
125538fd1498Szrj
125638fd1498Szrj gcc_assert (bb_vinfo);
125738fd1498Szrj bb = BB_VINFO_BB (bb_vinfo);
125838fd1498Szrj gsi_bb_start = gsi_after_labels (bb);
125938fd1498Szrj gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
126038fd1498Szrj }
126138fd1498Szrj }
126238fd1498Szrj
126338fd1498Szrj if (dump_enabled_p ())
126438fd1498Szrj {
126538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
126638fd1498Szrj "created new init_stmt: ");
126738fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
126838fd1498Szrj }
126938fd1498Szrj }
127038fd1498Szrj
127138fd1498Szrj /* Function vect_init_vector.
127238fd1498Szrj
127338fd1498Szrj Insert a new stmt (INIT_STMT) that initializes a new variable of type
127438fd1498Szrj TYPE with the value VAL. If TYPE is a vector type and VAL does not have
127538fd1498Szrj vector type a vector with all elements equal to VAL is created first.
127638fd1498Szrj Place the initialization at BSI if it is not NULL. Otherwise, place the
127738fd1498Szrj initialization at the loop preheader.
127838fd1498Szrj Return the DEF of INIT_STMT.
127938fd1498Szrj It will be used in the vectorization of STMT. */
128038fd1498Szrj
128138fd1498Szrj tree
vect_init_vector(gimple * stmt,tree val,tree type,gimple_stmt_iterator * gsi)128238fd1498Szrj vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
128338fd1498Szrj {
128438fd1498Szrj gimple *init_stmt;
128538fd1498Szrj tree new_temp;
128638fd1498Szrj
128738fd1498Szrj /* We abuse this function to push sth to a SSA name with initial 'val'. */
128838fd1498Szrj if (! useless_type_conversion_p (type, TREE_TYPE (val)))
128938fd1498Szrj {
129038fd1498Szrj gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
129138fd1498Szrj if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
129238fd1498Szrj {
129338fd1498Szrj /* Scalar boolean value should be transformed into
129438fd1498Szrj all zeros or all ones value before building a vector. */
129538fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (type))
129638fd1498Szrj {
129738fd1498Szrj tree true_val = build_all_ones_cst (TREE_TYPE (type));
129838fd1498Szrj tree false_val = build_zero_cst (TREE_TYPE (type));
129938fd1498Szrj
130038fd1498Szrj if (CONSTANT_CLASS_P (val))
130138fd1498Szrj val = integer_zerop (val) ? false_val : true_val;
130238fd1498Szrj else
130338fd1498Szrj {
130438fd1498Szrj new_temp = make_ssa_name (TREE_TYPE (type));
130538fd1498Szrj init_stmt = gimple_build_assign (new_temp, COND_EXPR,
130638fd1498Szrj val, true_val, false_val);
130738fd1498Szrj vect_init_vector_1 (stmt, init_stmt, gsi);
130838fd1498Szrj val = new_temp;
130938fd1498Szrj }
131038fd1498Szrj }
131138fd1498Szrj else if (CONSTANT_CLASS_P (val))
131238fd1498Szrj val = fold_convert (TREE_TYPE (type), val);
131338fd1498Szrj else
131438fd1498Szrj {
131538fd1498Szrj new_temp = make_ssa_name (TREE_TYPE (type));
131638fd1498Szrj if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
131738fd1498Szrj init_stmt = gimple_build_assign (new_temp,
131838fd1498Szrj fold_build1 (VIEW_CONVERT_EXPR,
131938fd1498Szrj TREE_TYPE (type),
132038fd1498Szrj val));
132138fd1498Szrj else
132238fd1498Szrj init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
132338fd1498Szrj vect_init_vector_1 (stmt, init_stmt, gsi);
132438fd1498Szrj val = new_temp;
132538fd1498Szrj }
132638fd1498Szrj }
132738fd1498Szrj val = build_vector_from_val (type, val);
132838fd1498Szrj }
132938fd1498Szrj
133038fd1498Szrj new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
133138fd1498Szrj init_stmt = gimple_build_assign (new_temp, val);
133238fd1498Szrj vect_init_vector_1 (stmt, init_stmt, gsi);
133338fd1498Szrj return new_temp;
133438fd1498Szrj }
133538fd1498Szrj
133638fd1498Szrj /* Function vect_get_vec_def_for_operand_1.
133738fd1498Szrj
133838fd1498Szrj For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
133938fd1498Szrj DT that will be used in the vectorized stmt. */
134038fd1498Szrj
134138fd1498Szrj tree
vect_get_vec_def_for_operand_1(gimple * def_stmt,enum vect_def_type dt)134238fd1498Szrj vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
134338fd1498Szrj {
134438fd1498Szrj tree vec_oprnd;
134538fd1498Szrj gimple *vec_stmt;
134638fd1498Szrj stmt_vec_info def_stmt_info = NULL;
134738fd1498Szrj
134838fd1498Szrj switch (dt)
134938fd1498Szrj {
135038fd1498Szrj /* operand is a constant or a loop invariant. */
135138fd1498Szrj case vect_constant_def:
135238fd1498Szrj case vect_external_def:
135338fd1498Szrj /* Code should use vect_get_vec_def_for_operand. */
135438fd1498Szrj gcc_unreachable ();
135538fd1498Szrj
135638fd1498Szrj /* operand is defined inside the loop. */
135738fd1498Szrj case vect_internal_def:
135838fd1498Szrj {
135938fd1498Szrj /* Get the def from the vectorized stmt. */
136038fd1498Szrj def_stmt_info = vinfo_for_stmt (def_stmt);
136138fd1498Szrj
136238fd1498Szrj vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
136338fd1498Szrj /* Get vectorized pattern statement. */
136438fd1498Szrj if (!vec_stmt
136538fd1498Szrj && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
136638fd1498Szrj && !STMT_VINFO_RELEVANT (def_stmt_info))
136738fd1498Szrj vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
136838fd1498Szrj STMT_VINFO_RELATED_STMT (def_stmt_info)));
136938fd1498Szrj gcc_assert (vec_stmt);
137038fd1498Szrj if (gimple_code (vec_stmt) == GIMPLE_PHI)
137138fd1498Szrj vec_oprnd = PHI_RESULT (vec_stmt);
137238fd1498Szrj else if (is_gimple_call (vec_stmt))
137338fd1498Szrj vec_oprnd = gimple_call_lhs (vec_stmt);
137438fd1498Szrj else
137538fd1498Szrj vec_oprnd = gimple_assign_lhs (vec_stmt);
137638fd1498Szrj return vec_oprnd;
137738fd1498Szrj }
137838fd1498Szrj
137938fd1498Szrj /* operand is defined by a loop header phi. */
138038fd1498Szrj case vect_reduction_def:
138138fd1498Szrj case vect_double_reduction_def:
138238fd1498Szrj case vect_nested_cycle:
138338fd1498Szrj case vect_induction_def:
138438fd1498Szrj {
138538fd1498Szrj gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
138638fd1498Szrj
138738fd1498Szrj /* Get the def from the vectorized stmt. */
138838fd1498Szrj def_stmt_info = vinfo_for_stmt (def_stmt);
138938fd1498Szrj vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
139038fd1498Szrj if (gimple_code (vec_stmt) == GIMPLE_PHI)
139138fd1498Szrj vec_oprnd = PHI_RESULT (vec_stmt);
139238fd1498Szrj else
139338fd1498Szrj vec_oprnd = gimple_get_lhs (vec_stmt);
139438fd1498Szrj return vec_oprnd;
139538fd1498Szrj }
139638fd1498Szrj
139738fd1498Szrj default:
139838fd1498Szrj gcc_unreachable ();
139938fd1498Szrj }
140038fd1498Szrj }
140138fd1498Szrj
140238fd1498Szrj
140338fd1498Szrj /* Function vect_get_vec_def_for_operand.
140438fd1498Szrj
140538fd1498Szrj OP is an operand in STMT. This function returns a (vector) def that will be
140638fd1498Szrj used in the vectorized stmt for STMT.
140738fd1498Szrj
140838fd1498Szrj In the case that OP is an SSA_NAME which is defined in the loop, then
140938fd1498Szrj STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
141038fd1498Szrj
141138fd1498Szrj In case OP is an invariant or constant, a new stmt that creates a vector def
141238fd1498Szrj needs to be introduced. VECTYPE may be used to specify a required type for
141338fd1498Szrj vector invariant. */
141438fd1498Szrj
141538fd1498Szrj tree
vect_get_vec_def_for_operand(tree op,gimple * stmt,tree vectype)141638fd1498Szrj vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
141738fd1498Szrj {
141838fd1498Szrj gimple *def_stmt;
141938fd1498Szrj enum vect_def_type dt;
142038fd1498Szrj bool is_simple_use;
142138fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
142238fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
142338fd1498Szrj
142438fd1498Szrj if (dump_enabled_p ())
142538fd1498Szrj {
142638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
142738fd1498Szrj "vect_get_vec_def_for_operand: ");
142838fd1498Szrj dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
142938fd1498Szrj dump_printf (MSG_NOTE, "\n");
143038fd1498Szrj }
143138fd1498Szrj
143238fd1498Szrj is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
143338fd1498Szrj gcc_assert (is_simple_use);
143438fd1498Szrj if (def_stmt && dump_enabled_p ())
143538fd1498Szrj {
143638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
143738fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
143838fd1498Szrj }
143938fd1498Szrj
144038fd1498Szrj if (dt == vect_constant_def || dt == vect_external_def)
144138fd1498Szrj {
144238fd1498Szrj tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
144338fd1498Szrj tree vector_type;
144438fd1498Szrj
144538fd1498Szrj if (vectype)
144638fd1498Szrj vector_type = vectype;
144738fd1498Szrj else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
144838fd1498Szrj && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
144938fd1498Szrj vector_type = build_same_sized_truth_vector_type (stmt_vectype);
145038fd1498Szrj else
145138fd1498Szrj vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
145238fd1498Szrj
145338fd1498Szrj gcc_assert (vector_type);
145438fd1498Szrj return vect_init_vector (stmt, op, vector_type, NULL);
145538fd1498Szrj }
145638fd1498Szrj else
145738fd1498Szrj return vect_get_vec_def_for_operand_1 (def_stmt, dt);
145838fd1498Szrj }
145938fd1498Szrj
146038fd1498Szrj
146138fd1498Szrj /* Function vect_get_vec_def_for_stmt_copy
146238fd1498Szrj
146338fd1498Szrj Return a vector-def for an operand. This function is used when the
146438fd1498Szrj vectorized stmt to be created (by the caller to this function) is a "copy"
146538fd1498Szrj created in case the vectorized result cannot fit in one vector, and several
146638fd1498Szrj copies of the vector-stmt are required. In this case the vector-def is
146738fd1498Szrj retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
146838fd1498Szrj of the stmt that defines VEC_OPRND.
146938fd1498Szrj DT is the type of the vector def VEC_OPRND.
147038fd1498Szrj
147138fd1498Szrj Context:
147238fd1498Szrj In case the vectorization factor (VF) is bigger than the number
147338fd1498Szrj of elements that can fit in a vectype (nunits), we have to generate
147438fd1498Szrj more than one vector stmt to vectorize the scalar stmt. This situation
147538fd1498Szrj arises when there are multiple data-types operated upon in the loop; the
147638fd1498Szrj smallest data-type determines the VF, and as a result, when vectorizing
147738fd1498Szrj stmts operating on wider types we need to create 'VF/nunits' "copies" of the
147838fd1498Szrj vector stmt (each computing a vector of 'nunits' results, and together
147938fd1498Szrj computing 'VF' results in each iteration). This function is called when
148038fd1498Szrj vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
148138fd1498Szrj which VF=16 and nunits=4, so the number of copies required is 4):
148238fd1498Szrj
148338fd1498Szrj scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
148438fd1498Szrj
148538fd1498Szrj S1: x = load VS1.0: vx.0 = memref0 VS1.1
148638fd1498Szrj VS1.1: vx.1 = memref1 VS1.2
148738fd1498Szrj VS1.2: vx.2 = memref2 VS1.3
148838fd1498Szrj VS1.3: vx.3 = memref3
148938fd1498Szrj
149038fd1498Szrj S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
149138fd1498Szrj VSnew.1: vz1 = vx.1 + ... VSnew.2
149238fd1498Szrj VSnew.2: vz2 = vx.2 + ... VSnew.3
149338fd1498Szrj VSnew.3: vz3 = vx.3 + ...
149438fd1498Szrj
149538fd1498Szrj The vectorization of S1 is explained in vectorizable_load.
149638fd1498Szrj The vectorization of S2:
149738fd1498Szrj To create the first vector-stmt out of the 4 copies - VSnew.0 -
149838fd1498Szrj the function 'vect_get_vec_def_for_operand' is called to
149938fd1498Szrj get the relevant vector-def for each operand of S2. For operand x it
150038fd1498Szrj returns the vector-def 'vx.0'.
150138fd1498Szrj
150238fd1498Szrj To create the remaining copies of the vector-stmt (VSnew.j), this
150338fd1498Szrj function is called to get the relevant vector-def for each operand. It is
150438fd1498Szrj obtained from the respective VS1.j stmt, which is recorded in the
150538fd1498Szrj STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
150638fd1498Szrj
150738fd1498Szrj For example, to obtain the vector-def 'vx.1' in order to create the
150838fd1498Szrj vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
150938fd1498Szrj Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
151038fd1498Szrj STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
151138fd1498Szrj and return its def ('vx.1').
151238fd1498Szrj Overall, to create the above sequence this function will be called 3 times:
151338fd1498Szrj vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
151438fd1498Szrj vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
151538fd1498Szrj vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
151638fd1498Szrj
151738fd1498Szrj tree
vect_get_vec_def_for_stmt_copy(enum vect_def_type dt,tree vec_oprnd)151838fd1498Szrj vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
151938fd1498Szrj {
152038fd1498Szrj gimple *vec_stmt_for_operand;
152138fd1498Szrj stmt_vec_info def_stmt_info;
152238fd1498Szrj
152338fd1498Szrj /* Do nothing; can reuse same def. */
152438fd1498Szrj if (dt == vect_external_def || dt == vect_constant_def )
152538fd1498Szrj return vec_oprnd;
152638fd1498Szrj
152738fd1498Szrj vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
152838fd1498Szrj def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
152938fd1498Szrj gcc_assert (def_stmt_info);
153038fd1498Szrj vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
153138fd1498Szrj gcc_assert (vec_stmt_for_operand);
153238fd1498Szrj if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
153338fd1498Szrj vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
153438fd1498Szrj else
153538fd1498Szrj vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
153638fd1498Szrj return vec_oprnd;
153738fd1498Szrj }
153838fd1498Szrj
153938fd1498Szrj
154038fd1498Szrj /* Get vectorized definitions for the operands to create a copy of an original
154138fd1498Szrj stmt. See vect_get_vec_def_for_stmt_copy () for details. */
154238fd1498Szrj
154338fd1498Szrj void
vect_get_vec_defs_for_stmt_copy(enum vect_def_type * dt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)154438fd1498Szrj vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
154538fd1498Szrj vec<tree> *vec_oprnds0,
154638fd1498Szrj vec<tree> *vec_oprnds1)
154738fd1498Szrj {
154838fd1498Szrj tree vec_oprnd = vec_oprnds0->pop ();
154938fd1498Szrj
155038fd1498Szrj vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
155138fd1498Szrj vec_oprnds0->quick_push (vec_oprnd);
155238fd1498Szrj
155338fd1498Szrj if (vec_oprnds1 && vec_oprnds1->length ())
155438fd1498Szrj {
155538fd1498Szrj vec_oprnd = vec_oprnds1->pop ();
155638fd1498Szrj vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
155738fd1498Szrj vec_oprnds1->quick_push (vec_oprnd);
155838fd1498Szrj }
155938fd1498Szrj }
156038fd1498Szrj
156138fd1498Szrj
156238fd1498Szrj /* Get vectorized definitions for OP0 and OP1. */
156338fd1498Szrj
156438fd1498Szrj void
vect_get_vec_defs(tree op0,tree op1,gimple * stmt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)156538fd1498Szrj vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
156638fd1498Szrj vec<tree> *vec_oprnds0,
156738fd1498Szrj vec<tree> *vec_oprnds1,
156838fd1498Szrj slp_tree slp_node)
156938fd1498Szrj {
157038fd1498Szrj if (slp_node)
157138fd1498Szrj {
157238fd1498Szrj int nops = (op1 == NULL_TREE) ? 1 : 2;
157338fd1498Szrj auto_vec<tree> ops (nops);
157438fd1498Szrj auto_vec<vec<tree> > vec_defs (nops);
157538fd1498Szrj
157638fd1498Szrj ops.quick_push (op0);
157738fd1498Szrj if (op1)
157838fd1498Szrj ops.quick_push (op1);
157938fd1498Szrj
158038fd1498Szrj vect_get_slp_defs (ops, slp_node, &vec_defs);
158138fd1498Szrj
158238fd1498Szrj *vec_oprnds0 = vec_defs[0];
158338fd1498Szrj if (op1)
158438fd1498Szrj *vec_oprnds1 = vec_defs[1];
158538fd1498Szrj }
158638fd1498Szrj else
158738fd1498Szrj {
158838fd1498Szrj tree vec_oprnd;
158938fd1498Szrj
159038fd1498Szrj vec_oprnds0->create (1);
159138fd1498Szrj vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
159238fd1498Szrj vec_oprnds0->quick_push (vec_oprnd);
159338fd1498Szrj
159438fd1498Szrj if (op1)
159538fd1498Szrj {
159638fd1498Szrj vec_oprnds1->create (1);
159738fd1498Szrj vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
159838fd1498Szrj vec_oprnds1->quick_push (vec_oprnd);
159938fd1498Szrj }
160038fd1498Szrj }
160138fd1498Szrj }
160238fd1498Szrj
160338fd1498Szrj /* Helper function called by vect_finish_replace_stmt and
160438fd1498Szrj vect_finish_stmt_generation. Set the location of the new
160538fd1498Szrj statement and create a stmt_vec_info for it. */
160638fd1498Szrj
160738fd1498Szrj static void
vect_finish_stmt_generation_1(gimple * stmt,gimple * vec_stmt)160838fd1498Szrj vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
160938fd1498Szrj {
161038fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
161138fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
161238fd1498Szrj
161338fd1498Szrj set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
161438fd1498Szrj
161538fd1498Szrj if (dump_enabled_p ())
161638fd1498Szrj {
161738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
161838fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
161938fd1498Szrj }
162038fd1498Szrj
162138fd1498Szrj gimple_set_location (vec_stmt, gimple_location (stmt));
162238fd1498Szrj
162338fd1498Szrj /* While EH edges will generally prevent vectorization, stmt might
162438fd1498Szrj e.g. be in a must-not-throw region. Ensure newly created stmts
162538fd1498Szrj that could throw are part of the same region. */
162638fd1498Szrj int lp_nr = lookup_stmt_eh_lp (stmt);
162738fd1498Szrj if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
162838fd1498Szrj add_stmt_to_eh_lp (vec_stmt, lp_nr);
162938fd1498Szrj }
163038fd1498Szrj
163138fd1498Szrj /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
163238fd1498Szrj which sets the same scalar result as STMT did. */
163338fd1498Szrj
163438fd1498Szrj void
vect_finish_replace_stmt(gimple * stmt,gimple * vec_stmt)163538fd1498Szrj vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
163638fd1498Szrj {
163738fd1498Szrj gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
163838fd1498Szrj
163938fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640*58e805e6Szrj gsi_replace (&gsi, vec_stmt, true);
164138fd1498Szrj
164238fd1498Szrj vect_finish_stmt_generation_1 (stmt, vec_stmt);
164338fd1498Szrj }
164438fd1498Szrj
164538fd1498Szrj /* Function vect_finish_stmt_generation.
164638fd1498Szrj
164738fd1498Szrj Insert a new stmt. */
164838fd1498Szrj
164938fd1498Szrj void
vect_finish_stmt_generation(gimple * stmt,gimple * vec_stmt,gimple_stmt_iterator * gsi)165038fd1498Szrj vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
165138fd1498Szrj gimple_stmt_iterator *gsi)
165238fd1498Szrj {
165338fd1498Szrj gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
165438fd1498Szrj
165538fd1498Szrj if (!gsi_end_p (*gsi)
165638fd1498Szrj && gimple_has_mem_ops (vec_stmt))
165738fd1498Szrj {
165838fd1498Szrj gimple *at_stmt = gsi_stmt (*gsi);
165938fd1498Szrj tree vuse = gimple_vuse (at_stmt);
166038fd1498Szrj if (vuse && TREE_CODE (vuse) == SSA_NAME)
166138fd1498Szrj {
166238fd1498Szrj tree vdef = gimple_vdef (at_stmt);
166338fd1498Szrj gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
166438fd1498Szrj /* If we have an SSA vuse and insert a store, update virtual
166538fd1498Szrj SSA form to avoid triggering the renamer. Do so only
166638fd1498Szrj if we can easily see all uses - which is what almost always
166738fd1498Szrj happens with the way vectorized stmts are inserted. */
166838fd1498Szrj if ((vdef && TREE_CODE (vdef) == SSA_NAME)
166938fd1498Szrj && ((is_gimple_assign (vec_stmt)
167038fd1498Szrj && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
167138fd1498Szrj || (is_gimple_call (vec_stmt)
167238fd1498Szrj && !(gimple_call_flags (vec_stmt)
167338fd1498Szrj & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
167438fd1498Szrj {
167538fd1498Szrj tree new_vdef = copy_ssa_name (vuse, vec_stmt);
167638fd1498Szrj gimple_set_vdef (vec_stmt, new_vdef);
167738fd1498Szrj SET_USE (gimple_vuse_op (at_stmt), new_vdef);
167838fd1498Szrj }
167938fd1498Szrj }
168038fd1498Szrj }
168138fd1498Szrj gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
168238fd1498Szrj vect_finish_stmt_generation_1 (stmt, vec_stmt);
168338fd1498Szrj }
168438fd1498Szrj
168538fd1498Szrj /* We want to vectorize a call to combined function CFN with function
168638fd1498Szrj decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
168738fd1498Szrj as the types of all inputs. Check whether this is possible using
168838fd1498Szrj an internal function, returning its code if so or IFN_LAST if not. */
168938fd1498Szrj
169038fd1498Szrj static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)169138fd1498Szrj vectorizable_internal_function (combined_fn cfn, tree fndecl,
169238fd1498Szrj tree vectype_out, tree vectype_in)
169338fd1498Szrj {
169438fd1498Szrj internal_fn ifn;
169538fd1498Szrj if (internal_fn_p (cfn))
169638fd1498Szrj ifn = as_internal_fn (cfn);
169738fd1498Szrj else
169838fd1498Szrj ifn = associated_internal_fn (fndecl);
169938fd1498Szrj if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
170038fd1498Szrj {
170138fd1498Szrj const direct_internal_fn_info &info = direct_internal_fn (ifn);
170238fd1498Szrj if (info.vectorizable)
170338fd1498Szrj {
170438fd1498Szrj tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
170538fd1498Szrj tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
170638fd1498Szrj if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
170738fd1498Szrj OPTIMIZE_FOR_SPEED))
170838fd1498Szrj return ifn;
170938fd1498Szrj }
171038fd1498Szrj }
171138fd1498Szrj return IFN_LAST;
171238fd1498Szrj }
171338fd1498Szrj
171438fd1498Szrj
171538fd1498Szrj static tree permute_vec_elements (tree, tree, tree, gimple *,
171638fd1498Szrj gimple_stmt_iterator *);
171738fd1498Szrj
171838fd1498Szrj /* Check whether a load or store statement in the loop described by
171938fd1498Szrj LOOP_VINFO is possible in a fully-masked loop. This is testing
172038fd1498Szrj whether the vectorizer pass has the appropriate support, as well as
172138fd1498Szrj whether the target does.
172238fd1498Szrj
172338fd1498Szrj VLS_TYPE says whether the statement is a load or store and VECTYPE
172438fd1498Szrj is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
172538fd1498Szrj says how the load or store is going to be implemented and GROUP_SIZE
172638fd1498Szrj is the number of load or store statements in the containing group.
172738fd1498Szrj If the access is a gather load or scatter store, GS_INFO describes
172838fd1498Szrj its arguments.
172938fd1498Szrj
173038fd1498Szrj Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
173138fd1498Szrj supported, otherwise record the required mask types. */
173238fd1498Szrj
173338fd1498Szrj static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info)173438fd1498Szrj check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
173538fd1498Szrj vec_load_store_type vls_type, int group_size,
173638fd1498Szrj vect_memory_access_type memory_access_type,
173738fd1498Szrj gather_scatter_info *gs_info)
173838fd1498Szrj {
173938fd1498Szrj /* Invariant loads need no special support. */
174038fd1498Szrj if (memory_access_type == VMAT_INVARIANT)
174138fd1498Szrj return;
174238fd1498Szrj
174338fd1498Szrj vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
174438fd1498Szrj machine_mode vecmode = TYPE_MODE (vectype);
174538fd1498Szrj bool is_load = (vls_type == VLS_LOAD);
174638fd1498Szrj if (memory_access_type == VMAT_LOAD_STORE_LANES)
174738fd1498Szrj {
174838fd1498Szrj if (is_load
174938fd1498Szrj ? !vect_load_lanes_supported (vectype, group_size, true)
175038fd1498Szrj : !vect_store_lanes_supported (vectype, group_size, true))
175138fd1498Szrj {
175238fd1498Szrj if (dump_enabled_p ())
175338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
175438fd1498Szrj "can't use a fully-masked loop because the"
175538fd1498Szrj " target doesn't have an appropriate masked"
175638fd1498Szrj " load/store-lanes instruction.\n");
175738fd1498Szrj LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
175838fd1498Szrj return;
175938fd1498Szrj }
176038fd1498Szrj unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
176138fd1498Szrj vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
176238fd1498Szrj return;
176338fd1498Szrj }
176438fd1498Szrj
176538fd1498Szrj if (memory_access_type == VMAT_GATHER_SCATTER)
176638fd1498Szrj {
176738fd1498Szrj internal_fn ifn = (is_load
176838fd1498Szrj ? IFN_MASK_GATHER_LOAD
176938fd1498Szrj : IFN_MASK_SCATTER_STORE);
177038fd1498Szrj tree offset_type = TREE_TYPE (gs_info->offset);
177138fd1498Szrj if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
177238fd1498Szrj gs_info->memory_type,
177338fd1498Szrj TYPE_SIGN (offset_type),
177438fd1498Szrj gs_info->scale))
177538fd1498Szrj {
177638fd1498Szrj if (dump_enabled_p ())
177738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
177838fd1498Szrj "can't use a fully-masked loop because the"
177938fd1498Szrj " target doesn't have an appropriate masked"
178038fd1498Szrj " gather load or scatter store instruction.\n");
178138fd1498Szrj LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
178238fd1498Szrj return;
178338fd1498Szrj }
178438fd1498Szrj unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
178538fd1498Szrj vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
178638fd1498Szrj return;
178738fd1498Szrj }
178838fd1498Szrj
178938fd1498Szrj if (memory_access_type != VMAT_CONTIGUOUS
179038fd1498Szrj && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
179138fd1498Szrj {
179238fd1498Szrj /* Element X of the data must come from iteration i * VF + X of the
179338fd1498Szrj scalar loop. We need more work to support other mappings. */
179438fd1498Szrj if (dump_enabled_p ())
179538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
179638fd1498Szrj "can't use a fully-masked loop because an access"
179738fd1498Szrj " isn't contiguous.\n");
179838fd1498Szrj LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
179938fd1498Szrj return;
180038fd1498Szrj }
180138fd1498Szrj
180238fd1498Szrj machine_mode mask_mode;
180338fd1498Szrj if (!(targetm.vectorize.get_mask_mode
180438fd1498Szrj (GET_MODE_NUNITS (vecmode),
180538fd1498Szrj GET_MODE_SIZE (vecmode)).exists (&mask_mode))
180638fd1498Szrj || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
180738fd1498Szrj {
180838fd1498Szrj if (dump_enabled_p ())
180938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
181038fd1498Szrj "can't use a fully-masked loop because the target"
181138fd1498Szrj " doesn't have the appropriate masked load or"
181238fd1498Szrj " store.\n");
181338fd1498Szrj LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
181438fd1498Szrj return;
181538fd1498Szrj }
181638fd1498Szrj /* We might load more scalars than we need for permuting SLP loads.
181738fd1498Szrj We checked in get_group_load_store_type that the extra elements
181838fd1498Szrj don't leak into a new vector. */
181938fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
182038fd1498Szrj poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
182138fd1498Szrj unsigned int nvectors;
182238fd1498Szrj if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
182338fd1498Szrj vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
182438fd1498Szrj else
182538fd1498Szrj gcc_unreachable ();
182638fd1498Szrj }
182738fd1498Szrj
182838fd1498Szrj /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
182938fd1498Szrj form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
183038fd1498Szrj that needs to be applied to all loads and stores in a vectorized loop.
183138fd1498Szrj Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
183238fd1498Szrj
183338fd1498Szrj MASK_TYPE is the type of both masks. If new statements are needed,
183438fd1498Szrj insert them before GSI. */
183538fd1498Szrj
183638fd1498Szrj static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)183738fd1498Szrj prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
183838fd1498Szrj gimple_stmt_iterator *gsi)
183938fd1498Szrj {
184038fd1498Szrj gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
184138fd1498Szrj if (!loop_mask)
184238fd1498Szrj return vec_mask;
184338fd1498Szrj
184438fd1498Szrj gcc_assert (TREE_TYPE (loop_mask) == mask_type);
184538fd1498Szrj tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
184638fd1498Szrj gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
184738fd1498Szrj vec_mask, loop_mask);
184838fd1498Szrj gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
184938fd1498Szrj return and_res;
185038fd1498Szrj }
185138fd1498Szrj
185238fd1498Szrj /* Determine whether we can use a gather load or scatter store to vectorize
185338fd1498Szrj strided load or store STMT by truncating the current offset to a smaller
185438fd1498Szrj width. We need to be able to construct an offset vector:
185538fd1498Szrj
185638fd1498Szrj { 0, X, X*2, X*3, ... }
185738fd1498Szrj
185838fd1498Szrj without loss of precision, where X is STMT's DR_STEP.
185938fd1498Szrj
186038fd1498Szrj Return true if this is possible, describing the gather load or scatter
186138fd1498Szrj store in GS_INFO. MASKED_P is true if the load or store is conditional. */
186238fd1498Szrj
186338fd1498Szrj static bool
vect_truncate_gather_scatter_offset(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)186438fd1498Szrj vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
186538fd1498Szrj bool masked_p,
186638fd1498Szrj gather_scatter_info *gs_info)
186738fd1498Szrj {
186838fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
186938fd1498Szrj data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
187038fd1498Szrj tree step = DR_STEP (dr);
187138fd1498Szrj if (TREE_CODE (step) != INTEGER_CST)
187238fd1498Szrj {
187338fd1498Szrj /* ??? Perhaps we could use range information here? */
187438fd1498Szrj if (dump_enabled_p ())
187538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
187638fd1498Szrj "cannot truncate variable step.\n");
187738fd1498Szrj return false;
187838fd1498Szrj }
187938fd1498Szrj
188038fd1498Szrj /* Get the number of bits in an element. */
188138fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
188238fd1498Szrj scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
188338fd1498Szrj unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
188438fd1498Szrj
188538fd1498Szrj /* Set COUNT to the upper limit on the number of elements - 1.
188638fd1498Szrj Start with the maximum vectorization factor. */
188738fd1498Szrj unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
188838fd1498Szrj
188938fd1498Szrj /* Try lowering COUNT to the number of scalar latch iterations. */
189038fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
189138fd1498Szrj widest_int max_iters;
189238fd1498Szrj if (max_loop_iterations (loop, &max_iters)
189338fd1498Szrj && max_iters < count)
189438fd1498Szrj count = max_iters.to_shwi ();
189538fd1498Szrj
189638fd1498Szrj /* Try scales of 1 and the element size. */
189738fd1498Szrj int scales[] = { 1, vect_get_scalar_dr_size (dr) };
189838fd1498Szrj bool overflow_p = false;
189938fd1498Szrj for (int i = 0; i < 2; ++i)
190038fd1498Szrj {
190138fd1498Szrj int scale = scales[i];
190238fd1498Szrj widest_int factor;
190338fd1498Szrj if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
190438fd1498Szrj continue;
190538fd1498Szrj
190638fd1498Szrj /* See whether we can calculate (COUNT - 1) * STEP / SCALE
190738fd1498Szrj in OFFSET_BITS bits. */
190838fd1498Szrj widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
190938fd1498Szrj if (overflow_p)
191038fd1498Szrj continue;
191138fd1498Szrj signop sign = range >= 0 ? UNSIGNED : SIGNED;
191238fd1498Szrj if (wi::min_precision (range, sign) > element_bits)
191338fd1498Szrj {
191438fd1498Szrj overflow_p = true;
191538fd1498Szrj continue;
191638fd1498Szrj }
191738fd1498Szrj
191838fd1498Szrj /* See whether the target supports the operation. */
191938fd1498Szrj tree memory_type = TREE_TYPE (DR_REF (dr));
192038fd1498Szrj if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
192138fd1498Szrj memory_type, element_bits, sign, scale,
192238fd1498Szrj &gs_info->ifn, &gs_info->element_type))
192338fd1498Szrj continue;
192438fd1498Szrj
192538fd1498Szrj tree offset_type = build_nonstandard_integer_type (element_bits,
192638fd1498Szrj sign == UNSIGNED);
192738fd1498Szrj
192838fd1498Szrj gs_info->decl = NULL_TREE;
192938fd1498Szrj /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
193038fd1498Szrj but we don't need to store that here. */
193138fd1498Szrj gs_info->base = NULL_TREE;
193238fd1498Szrj gs_info->offset = fold_convert (offset_type, step);
193338fd1498Szrj gs_info->offset_dt = vect_constant_def;
193438fd1498Szrj gs_info->offset_vectype = NULL_TREE;
193538fd1498Szrj gs_info->scale = scale;
193638fd1498Szrj gs_info->memory_type = memory_type;
193738fd1498Szrj return true;
193838fd1498Szrj }
193938fd1498Szrj
194038fd1498Szrj if (overflow_p && dump_enabled_p ())
194138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
194238fd1498Szrj "truncating gather/scatter offset to %d bits"
194338fd1498Szrj " might change its value.\n", element_bits);
194438fd1498Szrj
194538fd1498Szrj return false;
194638fd1498Szrj }
194738fd1498Szrj
194838fd1498Szrj /* Return true if we can use gather/scatter internal functions to
194938fd1498Szrj vectorize STMT, which is a grouped or strided load or store.
195038fd1498Szrj MASKED_P is true if load or store is conditional. When returning
195138fd1498Szrj true, fill in GS_INFO with the information required to perform the
195238fd1498Szrj operation. */
195338fd1498Szrj
195438fd1498Szrj static bool
vect_use_strided_gather_scatters_p(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)195538fd1498Szrj vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
195638fd1498Szrj bool masked_p,
195738fd1498Szrj gather_scatter_info *gs_info)
195838fd1498Szrj {
195938fd1498Szrj if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
196038fd1498Szrj || gs_info->decl)
196138fd1498Szrj return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
196238fd1498Szrj masked_p, gs_info);
196338fd1498Szrj
196438fd1498Szrj scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
196538fd1498Szrj unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
196638fd1498Szrj tree offset_type = TREE_TYPE (gs_info->offset);
196738fd1498Szrj unsigned int offset_bits = TYPE_PRECISION (offset_type);
196838fd1498Szrj
196938fd1498Szrj /* Enforced by vect_check_gather_scatter. */
197038fd1498Szrj gcc_assert (element_bits >= offset_bits);
197138fd1498Szrj
197238fd1498Szrj /* If the elements are wider than the offset, convert the offset to the
197338fd1498Szrj same width, without changing its sign. */
197438fd1498Szrj if (element_bits > offset_bits)
197538fd1498Szrj {
197638fd1498Szrj bool unsigned_p = TYPE_UNSIGNED (offset_type);
197738fd1498Szrj offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
197838fd1498Szrj gs_info->offset = fold_convert (offset_type, gs_info->offset);
197938fd1498Szrj }
198038fd1498Szrj
198138fd1498Szrj if (dump_enabled_p ())
198238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
198338fd1498Szrj "using gather/scatter for strided/grouped access,"
198438fd1498Szrj " scale = %d\n", gs_info->scale);
198538fd1498Szrj
198638fd1498Szrj return true;
198738fd1498Szrj }
198838fd1498Szrj
198938fd1498Szrj /* STMT is a non-strided load or store, meaning that it accesses
199038fd1498Szrj elements with a known constant step. Return -1 if that step
199138fd1498Szrj is negative, 0 if it is zero, and 1 if it is greater than zero. */
199238fd1498Szrj
199338fd1498Szrj static int
compare_step_with_zero(gimple * stmt)199438fd1498Szrj compare_step_with_zero (gimple *stmt)
199538fd1498Szrj {
199638fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
199738fd1498Szrj data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
199838fd1498Szrj return tree_int_cst_compare (vect_dr_behavior (dr)->step,
199938fd1498Szrj size_zero_node);
200038fd1498Szrj }
200138fd1498Szrj
200238fd1498Szrj /* If the target supports a permute mask that reverses the elements in
200338fd1498Szrj a vector of type VECTYPE, return that mask, otherwise return null. */
200438fd1498Szrj
200538fd1498Szrj static tree
perm_mask_for_reverse(tree vectype)200638fd1498Szrj perm_mask_for_reverse (tree vectype)
200738fd1498Szrj {
200838fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
200938fd1498Szrj
201038fd1498Szrj /* The encoding has a single stepped pattern. */
201138fd1498Szrj vec_perm_builder sel (nunits, 1, 3);
201238fd1498Szrj for (int i = 0; i < 3; ++i)
201338fd1498Szrj sel.quick_push (nunits - 1 - i);
201438fd1498Szrj
201538fd1498Szrj vec_perm_indices indices (sel, 1, nunits);
201638fd1498Szrj if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
201738fd1498Szrj return NULL_TREE;
201838fd1498Szrj return vect_gen_perm_mask_checked (vectype, indices);
201938fd1498Szrj }
202038fd1498Szrj
202138fd1498Szrj /* STMT is either a masked or unconditional store. Return the value
202238fd1498Szrj being stored. */
202338fd1498Szrj
202438fd1498Szrj tree
vect_get_store_rhs(gimple * stmt)202538fd1498Szrj vect_get_store_rhs (gimple *stmt)
202638fd1498Szrj {
202738fd1498Szrj if (gassign *assign = dyn_cast <gassign *> (stmt))
202838fd1498Szrj {
202938fd1498Szrj gcc_assert (gimple_assign_single_p (assign));
203038fd1498Szrj return gimple_assign_rhs1 (assign);
203138fd1498Szrj }
203238fd1498Szrj if (gcall *call = dyn_cast <gcall *> (stmt))
203338fd1498Szrj {
203438fd1498Szrj internal_fn ifn = gimple_call_internal_fn (call);
203538fd1498Szrj int index = internal_fn_stored_value_index (ifn);
203638fd1498Szrj gcc_assert (index >= 0);
203738fd1498Szrj return gimple_call_arg (stmt, index);
203838fd1498Szrj }
203938fd1498Szrj gcc_unreachable ();
204038fd1498Szrj }
204138fd1498Szrj
204238fd1498Szrj /* A subroutine of get_load_store_type, with a subset of the same
204338fd1498Szrj arguments. Handle the case where STMT is part of a grouped load
204438fd1498Szrj or store.
204538fd1498Szrj
204638fd1498Szrj For stores, the statements in the group are all consecutive
204738fd1498Szrj and there is no gap at the end. For loads, the statements in the
204838fd1498Szrj group might not be consecutive; there can be gaps between statements
204938fd1498Szrj as well as at the end. */
205038fd1498Szrj
205138fd1498Szrj static bool
get_group_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)205238fd1498Szrj get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
205338fd1498Szrj bool masked_p, vec_load_store_type vls_type,
205438fd1498Szrj vect_memory_access_type *memory_access_type,
205538fd1498Szrj gather_scatter_info *gs_info)
205638fd1498Szrj {
205738fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205838fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
205938fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
206038fd1498Szrj struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
206138fd1498Szrj gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
206238fd1498Szrj data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
206338fd1498Szrj unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
206438fd1498Szrj bool single_element_p = (stmt == first_stmt
206538fd1498Szrj && !GROUP_NEXT_ELEMENT (stmt_info));
206638fd1498Szrj unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
206738fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
206838fd1498Szrj
206938fd1498Szrj /* True if the vectorized statements would access beyond the last
207038fd1498Szrj statement in the group. */
207138fd1498Szrj bool overrun_p = false;
207238fd1498Szrj
207338fd1498Szrj /* True if we can cope with such overrun by peeling for gaps, so that
207438fd1498Szrj there is at least one final scalar iteration after the vector loop. */
207538fd1498Szrj bool can_overrun_p = (!masked_p
207638fd1498Szrj && vls_type == VLS_LOAD
207738fd1498Szrj && loop_vinfo
207838fd1498Szrj && !loop->inner);
207938fd1498Szrj
208038fd1498Szrj /* There can only be a gap at the end of the group if the stride is
208138fd1498Szrj known at compile time. */
208238fd1498Szrj gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
208338fd1498Szrj
208438fd1498Szrj /* Stores can't yet have gaps. */
208538fd1498Szrj gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
208638fd1498Szrj
208738fd1498Szrj if (slp)
208838fd1498Szrj {
208938fd1498Szrj if (STMT_VINFO_STRIDED_P (stmt_info))
209038fd1498Szrj {
209138fd1498Szrj /* Try to use consecutive accesses of GROUP_SIZE elements,
209238fd1498Szrj separated by the stride, until we have a complete vector.
209338fd1498Szrj Fall back to scalar accesses if that isn't possible. */
209438fd1498Szrj if (multiple_p (nunits, group_size))
209538fd1498Szrj *memory_access_type = VMAT_STRIDED_SLP;
209638fd1498Szrj else
209738fd1498Szrj *memory_access_type = VMAT_ELEMENTWISE;
209838fd1498Szrj }
209938fd1498Szrj else
210038fd1498Szrj {
210138fd1498Szrj overrun_p = loop_vinfo && gap != 0;
210238fd1498Szrj if (overrun_p && vls_type != VLS_LOAD)
210338fd1498Szrj {
210438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
210538fd1498Szrj "Grouped store with gaps requires"
210638fd1498Szrj " non-consecutive accesses\n");
210738fd1498Szrj return false;
210838fd1498Szrj }
210938fd1498Szrj /* An overrun is fine if the trailing elements are smaller
211038fd1498Szrj than the alignment boundary B. Every vector access will
211138fd1498Szrj be a multiple of B and so we are guaranteed to access a
211238fd1498Szrj non-gap element in the same B-sized block. */
211338fd1498Szrj if (overrun_p
211438fd1498Szrj && gap < (vect_known_alignment_in_bytes (first_dr)
211538fd1498Szrj / vect_get_scalar_dr_size (first_dr)))
211638fd1498Szrj overrun_p = false;
211738fd1498Szrj if (overrun_p && !can_overrun_p)
211838fd1498Szrj {
211938fd1498Szrj if (dump_enabled_p ())
212038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
212138fd1498Szrj "Peeling for outer loop is not supported\n");
212238fd1498Szrj return false;
212338fd1498Szrj }
212438fd1498Szrj *memory_access_type = VMAT_CONTIGUOUS;
212538fd1498Szrj }
212638fd1498Szrj }
212738fd1498Szrj else
212838fd1498Szrj {
212938fd1498Szrj /* We can always handle this case using elementwise accesses,
213038fd1498Szrj but see if something more efficient is available. */
213138fd1498Szrj *memory_access_type = VMAT_ELEMENTWISE;
213238fd1498Szrj
213338fd1498Szrj /* If there is a gap at the end of the group then these optimizations
213438fd1498Szrj would access excess elements in the last iteration. */
213538fd1498Szrj bool would_overrun_p = (gap != 0);
213638fd1498Szrj /* An overrun is fine if the trailing elements are smaller than the
213738fd1498Szrj alignment boundary B. Every vector access will be a multiple of B
213838fd1498Szrj and so we are guaranteed to access a non-gap element in the
213938fd1498Szrj same B-sized block. */
214038fd1498Szrj if (would_overrun_p
214138fd1498Szrj && !masked_p
214238fd1498Szrj && gap < (vect_known_alignment_in_bytes (first_dr)
214338fd1498Szrj / vect_get_scalar_dr_size (first_dr)))
214438fd1498Szrj would_overrun_p = false;
214538fd1498Szrj
214638fd1498Szrj if (!STMT_VINFO_STRIDED_P (stmt_info)
214738fd1498Szrj && (can_overrun_p || !would_overrun_p)
214838fd1498Szrj && compare_step_with_zero (stmt) > 0)
214938fd1498Szrj {
215038fd1498Szrj /* First cope with the degenerate case of a single-element
215138fd1498Szrj vector. */
215238fd1498Szrj if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
215338fd1498Szrj *memory_access_type = VMAT_CONTIGUOUS;
215438fd1498Szrj
215538fd1498Szrj /* Otherwise try using LOAD/STORE_LANES. */
215638fd1498Szrj if (*memory_access_type == VMAT_ELEMENTWISE
215738fd1498Szrj && (vls_type == VLS_LOAD
215838fd1498Szrj ? vect_load_lanes_supported (vectype, group_size, masked_p)
215938fd1498Szrj : vect_store_lanes_supported (vectype, group_size,
216038fd1498Szrj masked_p)))
216138fd1498Szrj {
216238fd1498Szrj *memory_access_type = VMAT_LOAD_STORE_LANES;
216338fd1498Szrj overrun_p = would_overrun_p;
216438fd1498Szrj }
216538fd1498Szrj
216638fd1498Szrj /* If that fails, try using permuting loads. */
216738fd1498Szrj if (*memory_access_type == VMAT_ELEMENTWISE
216838fd1498Szrj && (vls_type == VLS_LOAD
216938fd1498Szrj ? vect_grouped_load_supported (vectype, single_element_p,
217038fd1498Szrj group_size)
217138fd1498Szrj : vect_grouped_store_supported (vectype, group_size)))
217238fd1498Szrj {
217338fd1498Szrj *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
217438fd1498Szrj overrun_p = would_overrun_p;
217538fd1498Szrj }
217638fd1498Szrj }
217738fd1498Szrj
217838fd1498Szrj /* As a last resort, trying using a gather load or scatter store.
217938fd1498Szrj
218038fd1498Szrj ??? Although the code can handle all group sizes correctly,
218138fd1498Szrj it probably isn't a win to use separate strided accesses based
218238fd1498Szrj on nearby locations. Or, even if it's a win over scalar code,
218338fd1498Szrj it might not be a win over vectorizing at a lower VF, if that
218438fd1498Szrj allows us to use contiguous accesses. */
218538fd1498Szrj if (*memory_access_type == VMAT_ELEMENTWISE
218638fd1498Szrj && single_element_p
218738fd1498Szrj && loop_vinfo
218838fd1498Szrj && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
218938fd1498Szrj masked_p, gs_info))
219038fd1498Szrj *memory_access_type = VMAT_GATHER_SCATTER;
219138fd1498Szrj }
219238fd1498Szrj
219338fd1498Szrj if (vls_type != VLS_LOAD && first_stmt == stmt)
219438fd1498Szrj {
219538fd1498Szrj /* STMT is the leader of the group. Check the operands of all the
219638fd1498Szrj stmts of the group. */
219738fd1498Szrj gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
219838fd1498Szrj while (next_stmt)
219938fd1498Szrj {
220038fd1498Szrj tree op = vect_get_store_rhs (next_stmt);
220138fd1498Szrj gimple *def_stmt;
220238fd1498Szrj enum vect_def_type dt;
220338fd1498Szrj if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
220438fd1498Szrj {
220538fd1498Szrj if (dump_enabled_p ())
220638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
220738fd1498Szrj "use not simple.\n");
220838fd1498Szrj return false;
220938fd1498Szrj }
221038fd1498Szrj next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
221138fd1498Szrj }
221238fd1498Szrj }
221338fd1498Szrj
221438fd1498Szrj if (overrun_p)
221538fd1498Szrj {
221638fd1498Szrj gcc_assert (can_overrun_p);
221738fd1498Szrj if (dump_enabled_p ())
221838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
221938fd1498Szrj "Data access with gaps requires scalar "
222038fd1498Szrj "epilogue loop\n");
222138fd1498Szrj LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
222238fd1498Szrj }
222338fd1498Szrj
222438fd1498Szrj return true;
222538fd1498Szrj }
222638fd1498Szrj
222738fd1498Szrj /* A subroutine of get_load_store_type, with a subset of the same
222838fd1498Szrj arguments. Handle the case where STMT is a load or store that
222938fd1498Szrj accesses consecutive elements with a negative step. */
223038fd1498Szrj
223138fd1498Szrj static vect_memory_access_type
get_negative_load_store_type(gimple * stmt,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)223238fd1498Szrj get_negative_load_store_type (gimple *stmt, tree vectype,
223338fd1498Szrj vec_load_store_type vls_type,
223438fd1498Szrj unsigned int ncopies)
223538fd1498Szrj {
223638fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
223738fd1498Szrj struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
223838fd1498Szrj dr_alignment_support alignment_support_scheme;
223938fd1498Szrj
224038fd1498Szrj if (ncopies > 1)
224138fd1498Szrj {
224238fd1498Szrj if (dump_enabled_p ())
224338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
224438fd1498Szrj "multiple types with negative step.\n");
224538fd1498Szrj return VMAT_ELEMENTWISE;
224638fd1498Szrj }
224738fd1498Szrj
224838fd1498Szrj alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
224938fd1498Szrj if (alignment_support_scheme != dr_aligned
225038fd1498Szrj && alignment_support_scheme != dr_unaligned_supported)
225138fd1498Szrj {
225238fd1498Szrj if (dump_enabled_p ())
225338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
225438fd1498Szrj "negative step but alignment required.\n");
225538fd1498Szrj return VMAT_ELEMENTWISE;
225638fd1498Szrj }
225738fd1498Szrj
225838fd1498Szrj if (vls_type == VLS_STORE_INVARIANT)
225938fd1498Szrj {
226038fd1498Szrj if (dump_enabled_p ())
226138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
226238fd1498Szrj "negative step with invariant source;"
226338fd1498Szrj " no permute needed.\n");
226438fd1498Szrj return VMAT_CONTIGUOUS_DOWN;
226538fd1498Szrj }
226638fd1498Szrj
226738fd1498Szrj if (!perm_mask_for_reverse (vectype))
226838fd1498Szrj {
226938fd1498Szrj if (dump_enabled_p ())
227038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
227138fd1498Szrj "negative step and reversing not supported.\n");
227238fd1498Szrj return VMAT_ELEMENTWISE;
227338fd1498Szrj }
227438fd1498Szrj
227538fd1498Szrj return VMAT_CONTIGUOUS_REVERSE;
227638fd1498Szrj }
227738fd1498Szrj
227838fd1498Szrj /* Analyze load or store statement STMT of type VLS_TYPE. Return true
227938fd1498Szrj if there is a memory access type that the vectorized form can use,
228038fd1498Szrj storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
228138fd1498Szrj or scatters, fill in GS_INFO accordingly.
228238fd1498Szrj
228338fd1498Szrj SLP says whether we're performing SLP rather than loop vectorization.
228438fd1498Szrj MASKED_P is true if the statement is conditional on a vectorized mask.
228538fd1498Szrj VECTYPE is the vector type that the vectorized statements will use.
228638fd1498Szrj NCOPIES is the number of vector statements that will be needed. */
228738fd1498Szrj
228838fd1498Szrj static bool
get_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)228938fd1498Szrj get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
229038fd1498Szrj vec_load_store_type vls_type, unsigned int ncopies,
229138fd1498Szrj vect_memory_access_type *memory_access_type,
229238fd1498Szrj gather_scatter_info *gs_info)
229338fd1498Szrj {
229438fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
229538fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
229638fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
229738fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
229838fd1498Szrj if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
229938fd1498Szrj {
230038fd1498Szrj *memory_access_type = VMAT_GATHER_SCATTER;
230138fd1498Szrj gimple *def_stmt;
230238fd1498Szrj if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
230338fd1498Szrj gcc_unreachable ();
230438fd1498Szrj else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
230538fd1498Szrj &gs_info->offset_dt,
230638fd1498Szrj &gs_info->offset_vectype))
230738fd1498Szrj {
230838fd1498Szrj if (dump_enabled_p ())
230938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
231038fd1498Szrj "%s index use not simple.\n",
231138fd1498Szrj vls_type == VLS_LOAD ? "gather" : "scatter");
231238fd1498Szrj return false;
231338fd1498Szrj }
231438fd1498Szrj }
231538fd1498Szrj else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
231638fd1498Szrj {
231738fd1498Szrj if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
231838fd1498Szrj memory_access_type, gs_info))
231938fd1498Szrj return false;
232038fd1498Szrj }
232138fd1498Szrj else if (STMT_VINFO_STRIDED_P (stmt_info))
232238fd1498Szrj {
232338fd1498Szrj gcc_assert (!slp);
232438fd1498Szrj if (loop_vinfo
232538fd1498Szrj && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
232638fd1498Szrj masked_p, gs_info))
232738fd1498Szrj *memory_access_type = VMAT_GATHER_SCATTER;
232838fd1498Szrj else
232938fd1498Szrj *memory_access_type = VMAT_ELEMENTWISE;
233038fd1498Szrj }
233138fd1498Szrj else
233238fd1498Szrj {
233338fd1498Szrj int cmp = compare_step_with_zero (stmt);
233438fd1498Szrj if (cmp < 0)
233538fd1498Szrj *memory_access_type = get_negative_load_store_type
233638fd1498Szrj (stmt, vectype, vls_type, ncopies);
233738fd1498Szrj else if (cmp == 0)
233838fd1498Szrj {
233938fd1498Szrj gcc_assert (vls_type == VLS_LOAD);
234038fd1498Szrj *memory_access_type = VMAT_INVARIANT;
234138fd1498Szrj }
234238fd1498Szrj else
234338fd1498Szrj *memory_access_type = VMAT_CONTIGUOUS;
234438fd1498Szrj }
234538fd1498Szrj
234638fd1498Szrj if ((*memory_access_type == VMAT_ELEMENTWISE
234738fd1498Szrj || *memory_access_type == VMAT_STRIDED_SLP)
234838fd1498Szrj && !nunits.is_constant ())
234938fd1498Szrj {
235038fd1498Szrj if (dump_enabled_p ())
235138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
235238fd1498Szrj "Not using elementwise accesses due to variable "
235338fd1498Szrj "vectorization factor.\n");
235438fd1498Szrj return false;
235538fd1498Szrj }
235638fd1498Szrj
235738fd1498Szrj /* FIXME: At the moment the cost model seems to underestimate the
235838fd1498Szrj cost of using elementwise accesses. This check preserves the
235938fd1498Szrj traditional behavior until that can be fixed. */
236038fd1498Szrj if (*memory_access_type == VMAT_ELEMENTWISE
236138fd1498Szrj && !STMT_VINFO_STRIDED_P (stmt_info)
236238fd1498Szrj && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
236338fd1498Szrj && !GROUP_NEXT_ELEMENT (stmt_info)
236438fd1498Szrj && !pow2p_hwi (GROUP_SIZE (stmt_info))))
236538fd1498Szrj {
236638fd1498Szrj if (dump_enabled_p ())
236738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
236838fd1498Szrj "not falling back to elementwise accesses\n");
236938fd1498Szrj return false;
237038fd1498Szrj }
237138fd1498Szrj return true;
237238fd1498Szrj }
237338fd1498Szrj
237438fd1498Szrj /* Return true if boolean argument MASK is suitable for vectorizing
237538fd1498Szrj conditional load or store STMT. When returning true, store the type
237638fd1498Szrj of the definition in *MASK_DT_OUT and the type of the vectorized mask
237738fd1498Szrj in *MASK_VECTYPE_OUT. */
237838fd1498Szrj
237938fd1498Szrj static bool
vect_check_load_store_mask(gimple * stmt,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)238038fd1498Szrj vect_check_load_store_mask (gimple *stmt, tree mask,
238138fd1498Szrj vect_def_type *mask_dt_out,
238238fd1498Szrj tree *mask_vectype_out)
238338fd1498Szrj {
238438fd1498Szrj if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
238538fd1498Szrj {
238638fd1498Szrj if (dump_enabled_p ())
238738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
238838fd1498Szrj "mask argument is not a boolean.\n");
238938fd1498Szrj return false;
239038fd1498Szrj }
239138fd1498Szrj
239238fd1498Szrj if (TREE_CODE (mask) != SSA_NAME)
239338fd1498Szrj {
239438fd1498Szrj if (dump_enabled_p ())
239538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
239638fd1498Szrj "mask argument is not an SSA name.\n");
239738fd1498Szrj return false;
239838fd1498Szrj }
239938fd1498Szrj
240038fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
240138fd1498Szrj gimple *def_stmt;
240238fd1498Szrj enum vect_def_type mask_dt;
240338fd1498Szrj tree mask_vectype;
240438fd1498Szrj if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
240538fd1498Szrj &mask_vectype))
240638fd1498Szrj {
240738fd1498Szrj if (dump_enabled_p ())
240838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
240938fd1498Szrj "mask use not simple.\n");
241038fd1498Szrj return false;
241138fd1498Szrj }
241238fd1498Szrj
241338fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
241438fd1498Szrj if (!mask_vectype)
241538fd1498Szrj mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
241638fd1498Szrj
241738fd1498Szrj if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
241838fd1498Szrj {
241938fd1498Szrj if (dump_enabled_p ())
242038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
242138fd1498Szrj "could not find an appropriate vector mask type.\n");
242238fd1498Szrj return false;
242338fd1498Szrj }
242438fd1498Szrj
242538fd1498Szrj if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
242638fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype)))
242738fd1498Szrj {
242838fd1498Szrj if (dump_enabled_p ())
242938fd1498Szrj {
243038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
243138fd1498Szrj "vector mask type ");
243238fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
243338fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION,
243438fd1498Szrj " does not match vector data type ");
243538fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
243638fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
243738fd1498Szrj }
243838fd1498Szrj return false;
243938fd1498Szrj }
244038fd1498Szrj
244138fd1498Szrj *mask_dt_out = mask_dt;
244238fd1498Szrj *mask_vectype_out = mask_vectype;
244338fd1498Szrj return true;
244438fd1498Szrj }
244538fd1498Szrj
244638fd1498Szrj /* Return true if stored value RHS is suitable for vectorizing store
244738fd1498Szrj statement STMT. When returning true, store the type of the
244838fd1498Szrj definition in *RHS_DT_OUT, the type of the vectorized store value in
244938fd1498Szrj *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
245038fd1498Szrj
245138fd1498Szrj static bool
vect_check_store_rhs(gimple * stmt,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)245238fd1498Szrj vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
245338fd1498Szrj tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
245438fd1498Szrj {
245538fd1498Szrj /* In the case this is a store from a constant make sure
245638fd1498Szrj native_encode_expr can handle it. */
245738fd1498Szrj if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
245838fd1498Szrj {
245938fd1498Szrj if (dump_enabled_p ())
246038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
246138fd1498Szrj "cannot encode constant as a byte sequence.\n");
246238fd1498Szrj return false;
246338fd1498Szrj }
246438fd1498Szrj
246538fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
246638fd1498Szrj gimple *def_stmt;
246738fd1498Szrj enum vect_def_type rhs_dt;
246838fd1498Szrj tree rhs_vectype;
246938fd1498Szrj if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
247038fd1498Szrj &rhs_vectype))
247138fd1498Szrj {
247238fd1498Szrj if (dump_enabled_p ())
247338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
247438fd1498Szrj "use not simple.\n");
247538fd1498Szrj return false;
247638fd1498Szrj }
247738fd1498Szrj
247838fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
247938fd1498Szrj if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
248038fd1498Szrj {
248138fd1498Szrj if (dump_enabled_p ())
248238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
248338fd1498Szrj "incompatible vector types.\n");
248438fd1498Szrj return false;
248538fd1498Szrj }
248638fd1498Szrj
248738fd1498Szrj *rhs_dt_out = rhs_dt;
248838fd1498Szrj *rhs_vectype_out = rhs_vectype;
248938fd1498Szrj if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
249038fd1498Szrj *vls_type_out = VLS_STORE_INVARIANT;
249138fd1498Szrj else
249238fd1498Szrj *vls_type_out = VLS_STORE;
249338fd1498Szrj return true;
249438fd1498Szrj }
249538fd1498Szrj
249638fd1498Szrj /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
249738fd1498Szrj Note that we support masks with floating-point type, in which case the
249838fd1498Szrj floats are interpreted as a bitmask. */
249938fd1498Szrj
250038fd1498Szrj static tree
vect_build_all_ones_mask(gimple * stmt,tree masktype)250138fd1498Szrj vect_build_all_ones_mask (gimple *stmt, tree masktype)
250238fd1498Szrj {
250338fd1498Szrj if (TREE_CODE (masktype) == INTEGER_TYPE)
250438fd1498Szrj return build_int_cst (masktype, -1);
250538fd1498Szrj else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
250638fd1498Szrj {
250738fd1498Szrj tree mask = build_int_cst (TREE_TYPE (masktype), -1);
250838fd1498Szrj mask = build_vector_from_val (masktype, mask);
250938fd1498Szrj return vect_init_vector (stmt, mask, masktype, NULL);
251038fd1498Szrj }
251138fd1498Szrj else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
251238fd1498Szrj {
251338fd1498Szrj REAL_VALUE_TYPE r;
251438fd1498Szrj long tmp[6];
251538fd1498Szrj for (int j = 0; j < 6; ++j)
251638fd1498Szrj tmp[j] = -1;
251738fd1498Szrj real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
251838fd1498Szrj tree mask = build_real (TREE_TYPE (masktype), r);
251938fd1498Szrj mask = build_vector_from_val (masktype, mask);
252038fd1498Szrj return vect_init_vector (stmt, mask, masktype, NULL);
252138fd1498Szrj }
252238fd1498Szrj gcc_unreachable ();
252338fd1498Szrj }
252438fd1498Szrj
252538fd1498Szrj /* Build an all-zero merge value of type VECTYPE while vectorizing
252638fd1498Szrj STMT as a gather load. */
252738fd1498Szrj
252838fd1498Szrj static tree
vect_build_zero_merge_argument(gimple * stmt,tree vectype)252938fd1498Szrj vect_build_zero_merge_argument (gimple *stmt, tree vectype)
253038fd1498Szrj {
253138fd1498Szrj tree merge;
253238fd1498Szrj if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
253338fd1498Szrj merge = build_int_cst (TREE_TYPE (vectype), 0);
253438fd1498Szrj else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
253538fd1498Szrj {
253638fd1498Szrj REAL_VALUE_TYPE r;
253738fd1498Szrj long tmp[6];
253838fd1498Szrj for (int j = 0; j < 6; ++j)
253938fd1498Szrj tmp[j] = 0;
254038fd1498Szrj real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
254138fd1498Szrj merge = build_real (TREE_TYPE (vectype), r);
254238fd1498Szrj }
254338fd1498Szrj else
254438fd1498Szrj gcc_unreachable ();
254538fd1498Szrj merge = build_vector_from_val (vectype, merge);
254638fd1498Szrj return vect_init_vector (stmt, merge, vectype, NULL);
254738fd1498Szrj }
254838fd1498Szrj
254938fd1498Szrj /* Build a gather load call while vectorizing STMT. Insert new instructions
255038fd1498Szrj before GSI and add them to VEC_STMT. GS_INFO describes the gather load
255138fd1498Szrj operation. If the load is conditional, MASK is the unvectorized
255238fd1498Szrj condition and MASK_DT is its definition type, otherwise MASK is null. */
255338fd1498Szrj
255438fd1498Szrj static void
vect_build_gather_load_calls(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask,vect_def_type mask_dt)255538fd1498Szrj vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
255638fd1498Szrj gimple **vec_stmt, gather_scatter_info *gs_info,
255738fd1498Szrj tree mask, vect_def_type mask_dt)
255838fd1498Szrj {
255938fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256038fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
256138fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
256238fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
256338fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
256438fd1498Szrj int ncopies = vect_get_num_copies (loop_vinfo, vectype);
256538fd1498Szrj edge pe = loop_preheader_edge (loop);
256638fd1498Szrj enum { NARROW, NONE, WIDEN } modifier;
256738fd1498Szrj poly_uint64 gather_off_nunits
256838fd1498Szrj = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
256938fd1498Szrj
257038fd1498Szrj tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
257138fd1498Szrj tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
257238fd1498Szrj tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257338fd1498Szrj tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257438fd1498Szrj tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257538fd1498Szrj tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257638fd1498Szrj tree scaletype = TREE_VALUE (arglist);
257738fd1498Szrj gcc_checking_assert (types_compatible_p (srctype, rettype)
257838fd1498Szrj && (!mask || types_compatible_p (srctype, masktype)));
257938fd1498Szrj
258038fd1498Szrj tree perm_mask = NULL_TREE;
258138fd1498Szrj tree mask_perm_mask = NULL_TREE;
258238fd1498Szrj if (known_eq (nunits, gather_off_nunits))
258338fd1498Szrj modifier = NONE;
258438fd1498Szrj else if (known_eq (nunits * 2, gather_off_nunits))
258538fd1498Szrj {
258638fd1498Szrj modifier = WIDEN;
258738fd1498Szrj
258838fd1498Szrj /* Currently widening gathers and scatters are only supported for
258938fd1498Szrj fixed-length vectors. */
259038fd1498Szrj int count = gather_off_nunits.to_constant ();
259138fd1498Szrj vec_perm_builder sel (count, count, 1);
259238fd1498Szrj for (int i = 0; i < count; ++i)
259338fd1498Szrj sel.quick_push (i | (count / 2));
259438fd1498Szrj
259538fd1498Szrj vec_perm_indices indices (sel, 1, count);
259638fd1498Szrj perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
259738fd1498Szrj indices);
259838fd1498Szrj }
259938fd1498Szrj else if (known_eq (nunits, gather_off_nunits * 2))
260038fd1498Szrj {
260138fd1498Szrj modifier = NARROW;
260238fd1498Szrj
260338fd1498Szrj /* Currently narrowing gathers and scatters are only supported for
260438fd1498Szrj fixed-length vectors. */
260538fd1498Szrj int count = nunits.to_constant ();
260638fd1498Szrj vec_perm_builder sel (count, count, 1);
260738fd1498Szrj sel.quick_grow (count);
260838fd1498Szrj for (int i = 0; i < count; ++i)
260938fd1498Szrj sel[i] = i < count / 2 ? i : i + count / 2;
261038fd1498Szrj vec_perm_indices indices (sel, 2, count);
261138fd1498Szrj perm_mask = vect_gen_perm_mask_checked (vectype, indices);
261238fd1498Szrj
261338fd1498Szrj ncopies *= 2;
261438fd1498Szrj
261538fd1498Szrj if (mask)
261638fd1498Szrj {
261738fd1498Szrj for (int i = 0; i < count; ++i)
261838fd1498Szrj sel[i] = i | (count / 2);
261938fd1498Szrj indices.new_vector (sel, 2, count);
262038fd1498Szrj mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
262138fd1498Szrj }
262238fd1498Szrj }
262338fd1498Szrj else
262438fd1498Szrj gcc_unreachable ();
262538fd1498Szrj
262638fd1498Szrj tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
262738fd1498Szrj vectype);
262838fd1498Szrj
262938fd1498Szrj tree ptr = fold_convert (ptrtype, gs_info->base);
263038fd1498Szrj if (!is_gimple_min_invariant (ptr))
263138fd1498Szrj {
263238fd1498Szrj gimple_seq seq;
263338fd1498Szrj ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
263438fd1498Szrj basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
263538fd1498Szrj gcc_assert (!new_bb);
263638fd1498Szrj }
263738fd1498Szrj
263838fd1498Szrj tree scale = build_int_cst (scaletype, gs_info->scale);
263938fd1498Szrj
264038fd1498Szrj tree vec_oprnd0 = NULL_TREE;
264138fd1498Szrj tree vec_mask = NULL_TREE;
264238fd1498Szrj tree src_op = NULL_TREE;
264338fd1498Szrj tree mask_op = NULL_TREE;
264438fd1498Szrj tree prev_res = NULL_TREE;
264538fd1498Szrj stmt_vec_info prev_stmt_info = NULL;
264638fd1498Szrj
264738fd1498Szrj if (!mask)
264838fd1498Szrj {
264938fd1498Szrj src_op = vect_build_zero_merge_argument (stmt, rettype);
265038fd1498Szrj mask_op = vect_build_all_ones_mask (stmt, masktype);
265138fd1498Szrj }
265238fd1498Szrj
265338fd1498Szrj for (int j = 0; j < ncopies; ++j)
265438fd1498Szrj {
265538fd1498Szrj tree op, var;
265638fd1498Szrj gimple *new_stmt;
265738fd1498Szrj if (modifier == WIDEN && (j & 1))
265838fd1498Szrj op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
265938fd1498Szrj perm_mask, stmt, gsi);
266038fd1498Szrj else if (j == 0)
266138fd1498Szrj op = vec_oprnd0
266238fd1498Szrj = vect_get_vec_def_for_operand (gs_info->offset, stmt);
266338fd1498Szrj else
266438fd1498Szrj op = vec_oprnd0
266538fd1498Szrj = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
266638fd1498Szrj
266738fd1498Szrj if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
266838fd1498Szrj {
266938fd1498Szrj gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
267038fd1498Szrj TYPE_VECTOR_SUBPARTS (idxtype)));
267138fd1498Szrj var = vect_get_new_ssa_name (idxtype, vect_simple_var);
267238fd1498Szrj op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
267338fd1498Szrj new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
267438fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
267538fd1498Szrj op = var;
267638fd1498Szrj }
267738fd1498Szrj
267838fd1498Szrj if (mask)
267938fd1498Szrj {
268038fd1498Szrj if (mask_perm_mask && (j & 1))
268138fd1498Szrj mask_op = permute_vec_elements (mask_op, mask_op,
268238fd1498Szrj mask_perm_mask, stmt, gsi);
268338fd1498Szrj else
268438fd1498Szrj {
268538fd1498Szrj if (j == 0)
268638fd1498Szrj vec_mask = vect_get_vec_def_for_operand (mask, stmt);
268738fd1498Szrj else
268838fd1498Szrj vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
268938fd1498Szrj
269038fd1498Szrj mask_op = vec_mask;
269138fd1498Szrj if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
269238fd1498Szrj {
269338fd1498Szrj gcc_assert
269438fd1498Szrj (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
269538fd1498Szrj TYPE_VECTOR_SUBPARTS (masktype)));
269638fd1498Szrj var = vect_get_new_ssa_name (masktype, vect_simple_var);
269738fd1498Szrj mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
269838fd1498Szrj new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
269938fd1498Szrj mask_op);
270038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
270138fd1498Szrj mask_op = var;
270238fd1498Szrj }
270338fd1498Szrj }
270438fd1498Szrj src_op = mask_op;
270538fd1498Szrj }
270638fd1498Szrj
270738fd1498Szrj new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
270838fd1498Szrj mask_op, scale);
270938fd1498Szrj
271038fd1498Szrj if (!useless_type_conversion_p (vectype, rettype))
271138fd1498Szrj {
271238fd1498Szrj gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
271338fd1498Szrj TYPE_VECTOR_SUBPARTS (rettype)));
271438fd1498Szrj op = vect_get_new_ssa_name (rettype, vect_simple_var);
271538fd1498Szrj gimple_call_set_lhs (new_stmt, op);
271638fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
271738fd1498Szrj var = make_ssa_name (vec_dest);
271838fd1498Szrj op = build1 (VIEW_CONVERT_EXPR, vectype, op);
271938fd1498Szrj new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
272038fd1498Szrj }
272138fd1498Szrj else
272238fd1498Szrj {
272338fd1498Szrj var = make_ssa_name (vec_dest, new_stmt);
272438fd1498Szrj gimple_call_set_lhs (new_stmt, var);
272538fd1498Szrj }
272638fd1498Szrj
272738fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
272838fd1498Szrj
272938fd1498Szrj if (modifier == NARROW)
273038fd1498Szrj {
273138fd1498Szrj if ((j & 1) == 0)
273238fd1498Szrj {
273338fd1498Szrj prev_res = var;
273438fd1498Szrj continue;
273538fd1498Szrj }
273638fd1498Szrj var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
273738fd1498Szrj new_stmt = SSA_NAME_DEF_STMT (var);
273838fd1498Szrj }
273938fd1498Szrj
274038fd1498Szrj if (prev_stmt_info == NULL)
274138fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
274238fd1498Szrj else
274338fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
274438fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
274538fd1498Szrj }
274638fd1498Szrj }
274738fd1498Szrj
274838fd1498Szrj /* Prepare the base and offset in GS_INFO for vectorization.
274938fd1498Szrj Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
275038fd1498Szrj to the vectorized offset argument for the first copy of STMT. STMT
275138fd1498Szrj is the statement described by GS_INFO and LOOP is the containing loop. */
275238fd1498Szrj
275338fd1498Szrj static void
vect_get_gather_scatter_ops(struct loop * loop,gimple * stmt,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)275438fd1498Szrj vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
275538fd1498Szrj gather_scatter_info *gs_info,
275638fd1498Szrj tree *dataref_ptr, tree *vec_offset)
275738fd1498Szrj {
275838fd1498Szrj gimple_seq stmts = NULL;
275938fd1498Szrj *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
276038fd1498Szrj if (stmts != NULL)
276138fd1498Szrj {
276238fd1498Szrj basic_block new_bb;
276338fd1498Szrj edge pe = loop_preheader_edge (loop);
276438fd1498Szrj new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
276538fd1498Szrj gcc_assert (!new_bb);
276638fd1498Szrj }
276738fd1498Szrj tree offset_type = TREE_TYPE (gs_info->offset);
276838fd1498Szrj tree offset_vectype = get_vectype_for_scalar_type (offset_type);
276938fd1498Szrj *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
277038fd1498Szrj offset_vectype);
277138fd1498Szrj }
277238fd1498Szrj
277338fd1498Szrj /* Prepare to implement a grouped or strided load or store using
277438fd1498Szrj the gather load or scatter store operation described by GS_INFO.
277538fd1498Szrj STMT is the load or store statement.
277638fd1498Szrj
277738fd1498Szrj Set *DATAREF_BUMP to the amount that should be added to the base
277838fd1498Szrj address after each copy of the vectorized statement. Set *VEC_OFFSET
277938fd1498Szrj to an invariant offset vector in which element I has the value
278038fd1498Szrj I * DR_STEP / SCALE. */
278138fd1498Szrj
278238fd1498Szrj static void
vect_get_strided_load_store_ops(gimple * stmt,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)278338fd1498Szrj vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
278438fd1498Szrj gather_scatter_info *gs_info,
278538fd1498Szrj tree *dataref_bump, tree *vec_offset)
278638fd1498Szrj {
278738fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
278838fd1498Szrj struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
278938fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
279038fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
279138fd1498Szrj gimple_seq stmts;
279238fd1498Szrj
279338fd1498Szrj tree bump = size_binop (MULT_EXPR,
279438fd1498Szrj fold_convert (sizetype, DR_STEP (dr)),
279538fd1498Szrj size_int (TYPE_VECTOR_SUBPARTS (vectype)));
279638fd1498Szrj *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
279738fd1498Szrj if (stmts)
279838fd1498Szrj gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
279938fd1498Szrj
280038fd1498Szrj /* The offset given in GS_INFO can have pointer type, so use the element
280138fd1498Szrj type of the vector instead. */
280238fd1498Szrj tree offset_type = TREE_TYPE (gs_info->offset);
280338fd1498Szrj tree offset_vectype = get_vectype_for_scalar_type (offset_type);
280438fd1498Szrj offset_type = TREE_TYPE (offset_vectype);
280538fd1498Szrj
280638fd1498Szrj /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
280738fd1498Szrj tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
280838fd1498Szrj ssize_int (gs_info->scale));
280938fd1498Szrj step = fold_convert (offset_type, step);
281038fd1498Szrj step = force_gimple_operand (step, &stmts, true, NULL_TREE);
281138fd1498Szrj
281238fd1498Szrj /* Create {0, X, X*2, X*3, ...}. */
281338fd1498Szrj *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
281438fd1498Szrj build_zero_cst (offset_type), step);
281538fd1498Szrj if (stmts)
281638fd1498Szrj gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
281738fd1498Szrj }
281838fd1498Szrj
281938fd1498Szrj /* Return the amount that should be added to a vector pointer to move
282038fd1498Szrj to the next or previous copy of AGGR_TYPE. DR is the data reference
282138fd1498Szrj being vectorized and MEMORY_ACCESS_TYPE describes the type of
282238fd1498Szrj vectorization. */
282338fd1498Szrj
282438fd1498Szrj static tree
vect_get_data_ptr_increment(data_reference * dr,tree aggr_type,vect_memory_access_type memory_access_type)282538fd1498Szrj vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
282638fd1498Szrj vect_memory_access_type memory_access_type)
282738fd1498Szrj {
282838fd1498Szrj if (memory_access_type == VMAT_INVARIANT)
282938fd1498Szrj return size_zero_node;
283038fd1498Szrj
283138fd1498Szrj tree iv_step = TYPE_SIZE_UNIT (aggr_type);
283238fd1498Szrj tree step = vect_dr_behavior (dr)->step;
283338fd1498Szrj if (tree_int_cst_sgn (step) == -1)
283438fd1498Szrj iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
283538fd1498Szrj return iv_step;
283638fd1498Szrj }
283738fd1498Szrj
283838fd1498Szrj /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
283938fd1498Szrj
284038fd1498Szrj static bool
vectorizable_bswap(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,tree vectype_in,enum vect_def_type * dt)284138fd1498Szrj vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
284238fd1498Szrj gimple **vec_stmt, slp_tree slp_node,
284338fd1498Szrj tree vectype_in, enum vect_def_type *dt)
284438fd1498Szrj {
284538fd1498Szrj tree op, vectype;
284638fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
284738fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
284838fd1498Szrj unsigned ncopies;
284938fd1498Szrj unsigned HOST_WIDE_INT nunits, num_bytes;
285038fd1498Szrj
285138fd1498Szrj op = gimple_call_arg (stmt, 0);
285238fd1498Szrj vectype = STMT_VINFO_VECTYPE (stmt_info);
285338fd1498Szrj
285438fd1498Szrj if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
285538fd1498Szrj return false;
285638fd1498Szrj
285738fd1498Szrj /* Multiple types in SLP are handled by creating the appropriate number of
285838fd1498Szrj vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
285938fd1498Szrj case of SLP. */
286038fd1498Szrj if (slp_node)
286138fd1498Szrj ncopies = 1;
286238fd1498Szrj else
286338fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
286438fd1498Szrj
286538fd1498Szrj gcc_assert (ncopies >= 1);
286638fd1498Szrj
286738fd1498Szrj tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
286838fd1498Szrj if (! char_vectype)
286938fd1498Szrj return false;
287038fd1498Szrj
287138fd1498Szrj if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
287238fd1498Szrj return false;
287338fd1498Szrj
287438fd1498Szrj unsigned word_bytes = num_bytes / nunits;
287538fd1498Szrj
287638fd1498Szrj /* The encoding uses one stepped pattern for each byte in the word. */
287738fd1498Szrj vec_perm_builder elts (num_bytes, word_bytes, 3);
287838fd1498Szrj for (unsigned i = 0; i < 3; ++i)
287938fd1498Szrj for (unsigned j = 0; j < word_bytes; ++j)
288038fd1498Szrj elts.quick_push ((i + 1) * word_bytes - j - 1);
288138fd1498Szrj
288238fd1498Szrj vec_perm_indices indices (elts, 1, num_bytes);
288338fd1498Szrj if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
288438fd1498Szrj return false;
288538fd1498Szrj
288638fd1498Szrj if (! vec_stmt)
288738fd1498Szrj {
288838fd1498Szrj STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
288938fd1498Szrj if (dump_enabled_p ())
289038fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
289138fd1498Szrj "\n");
289238fd1498Szrj if (! slp_node)
289338fd1498Szrj {
289438fd1498Szrj add_stmt_cost (stmt_info->vinfo->target_cost_data,
289538fd1498Szrj 1, vector_stmt, stmt_info, 0, vect_prologue);
289638fd1498Szrj add_stmt_cost (stmt_info->vinfo->target_cost_data,
289738fd1498Szrj ncopies, vec_perm, stmt_info, 0, vect_body);
289838fd1498Szrj }
289938fd1498Szrj return true;
290038fd1498Szrj }
290138fd1498Szrj
290238fd1498Szrj tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
290338fd1498Szrj
290438fd1498Szrj /* Transform. */
290538fd1498Szrj vec<tree> vec_oprnds = vNULL;
290638fd1498Szrj gimple *new_stmt = NULL;
290738fd1498Szrj stmt_vec_info prev_stmt_info = NULL;
290838fd1498Szrj for (unsigned j = 0; j < ncopies; j++)
290938fd1498Szrj {
291038fd1498Szrj /* Handle uses. */
291138fd1498Szrj if (j == 0)
291238fd1498Szrj vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
291338fd1498Szrj else
291438fd1498Szrj vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
291538fd1498Szrj
291638fd1498Szrj /* Arguments are ready. create the new vector stmt. */
291738fd1498Szrj unsigned i;
291838fd1498Szrj tree vop;
291938fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
292038fd1498Szrj {
292138fd1498Szrj tree tem = make_ssa_name (char_vectype);
292238fd1498Szrj new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
292338fd1498Szrj char_vectype, vop));
292438fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
292538fd1498Szrj tree tem2 = make_ssa_name (char_vectype);
292638fd1498Szrj new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
292738fd1498Szrj tem, tem, bswap_vconst);
292838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
292938fd1498Szrj tem = make_ssa_name (vectype);
293038fd1498Szrj new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
293138fd1498Szrj vectype, tem2));
293238fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
293338fd1498Szrj if (slp_node)
293438fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
293538fd1498Szrj }
293638fd1498Szrj
293738fd1498Szrj if (slp_node)
293838fd1498Szrj continue;
293938fd1498Szrj
294038fd1498Szrj if (j == 0)
294138fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
294238fd1498Szrj else
294338fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
294438fd1498Szrj
294538fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
294638fd1498Szrj }
294738fd1498Szrj
294838fd1498Szrj vec_oprnds.release ();
294938fd1498Szrj return true;
295038fd1498Szrj }
295138fd1498Szrj
295238fd1498Szrj /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
295338fd1498Szrj integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
295438fd1498Szrj in a single step. On success, store the binary pack code in
295538fd1498Szrj *CONVERT_CODE. */
295638fd1498Szrj
295738fd1498Szrj static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)295838fd1498Szrj simple_integer_narrowing (tree vectype_out, tree vectype_in,
295938fd1498Szrj tree_code *convert_code)
296038fd1498Szrj {
296138fd1498Szrj if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
296238fd1498Szrj || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
296338fd1498Szrj return false;
296438fd1498Szrj
296538fd1498Szrj tree_code code;
296638fd1498Szrj int multi_step_cvt = 0;
296738fd1498Szrj auto_vec <tree, 8> interm_types;
296838fd1498Szrj if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
296938fd1498Szrj &code, &multi_step_cvt,
297038fd1498Szrj &interm_types)
297138fd1498Szrj || multi_step_cvt)
297238fd1498Szrj return false;
297338fd1498Szrj
297438fd1498Szrj *convert_code = code;
297538fd1498Szrj return true;
297638fd1498Szrj }
297738fd1498Szrj
297838fd1498Szrj /* Function vectorizable_call.
297938fd1498Szrj
298038fd1498Szrj Check if GS performs a function call that can be vectorized.
298138fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
298238fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at BSI.
298338fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
298438fd1498Szrj
298538fd1498Szrj static bool
vectorizable_call(gimple * gs,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)298638fd1498Szrj vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
298738fd1498Szrj slp_tree slp_node)
298838fd1498Szrj {
298938fd1498Szrj gcall *stmt;
299038fd1498Szrj tree vec_dest;
299138fd1498Szrj tree scalar_dest;
299238fd1498Szrj tree op, type;
299338fd1498Szrj tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
299438fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
299538fd1498Szrj tree vectype_out, vectype_in;
299638fd1498Szrj poly_uint64 nunits_in;
299738fd1498Szrj poly_uint64 nunits_out;
299838fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
299938fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
300038fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
300138fd1498Szrj tree fndecl, new_temp, rhs_type;
300238fd1498Szrj gimple *def_stmt;
300338fd1498Szrj enum vect_def_type dt[3]
300438fd1498Szrj = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
300538fd1498Szrj int ndts = 3;
300638fd1498Szrj gimple *new_stmt = NULL;
300738fd1498Szrj int ncopies, j;
300838fd1498Szrj vec<tree> vargs = vNULL;
300938fd1498Szrj enum { NARROW, NONE, WIDEN } modifier;
301038fd1498Szrj size_t i, nargs;
301138fd1498Szrj tree lhs;
301238fd1498Szrj
301338fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
301438fd1498Szrj return false;
301538fd1498Szrj
301638fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
301738fd1498Szrj && ! vec_stmt)
301838fd1498Szrj return false;
301938fd1498Szrj
302038fd1498Szrj /* Is GS a vectorizable call? */
302138fd1498Szrj stmt = dyn_cast <gcall *> (gs);
302238fd1498Szrj if (!stmt)
302338fd1498Szrj return false;
302438fd1498Szrj
302538fd1498Szrj if (gimple_call_internal_p (stmt)
302638fd1498Szrj && (internal_load_fn_p (gimple_call_internal_fn (stmt))
302738fd1498Szrj || internal_store_fn_p (gimple_call_internal_fn (stmt))))
302838fd1498Szrj /* Handled by vectorizable_load and vectorizable_store. */
302938fd1498Szrj return false;
303038fd1498Szrj
303138fd1498Szrj if (gimple_call_lhs (stmt) == NULL_TREE
303238fd1498Szrj || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
303338fd1498Szrj return false;
303438fd1498Szrj
303538fd1498Szrj gcc_checking_assert (!stmt_can_throw_internal (stmt));
303638fd1498Szrj
303738fd1498Szrj vectype_out = STMT_VINFO_VECTYPE (stmt_info);
303838fd1498Szrj
303938fd1498Szrj /* Process function arguments. */
304038fd1498Szrj rhs_type = NULL_TREE;
304138fd1498Szrj vectype_in = NULL_TREE;
304238fd1498Szrj nargs = gimple_call_num_args (stmt);
304338fd1498Szrj
304438fd1498Szrj /* Bail out if the function has more than three arguments, we do not have
304538fd1498Szrj interesting builtin functions to vectorize with more than two arguments
304638fd1498Szrj except for fma. No arguments is also not good. */
304738fd1498Szrj if (nargs == 0 || nargs > 3)
304838fd1498Szrj return false;
304938fd1498Szrj
305038fd1498Szrj /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
305138fd1498Szrj if (gimple_call_internal_p (stmt)
305238fd1498Szrj && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
305338fd1498Szrj {
305438fd1498Szrj nargs = 0;
305538fd1498Szrj rhs_type = unsigned_type_node;
305638fd1498Szrj }
305738fd1498Szrj
305838fd1498Szrj for (i = 0; i < nargs; i++)
305938fd1498Szrj {
306038fd1498Szrj tree opvectype;
306138fd1498Szrj
306238fd1498Szrj op = gimple_call_arg (stmt, i);
306338fd1498Szrj
306438fd1498Szrj /* We can only handle calls with arguments of the same type. */
306538fd1498Szrj if (rhs_type
306638fd1498Szrj && !types_compatible_p (rhs_type, TREE_TYPE (op)))
306738fd1498Szrj {
306838fd1498Szrj if (dump_enabled_p ())
306938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
307038fd1498Szrj "argument types differ.\n");
307138fd1498Szrj return false;
307238fd1498Szrj }
307338fd1498Szrj if (!rhs_type)
307438fd1498Szrj rhs_type = TREE_TYPE (op);
307538fd1498Szrj
307638fd1498Szrj if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
307738fd1498Szrj {
307838fd1498Szrj if (dump_enabled_p ())
307938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
308038fd1498Szrj "use not simple.\n");
308138fd1498Szrj return false;
308238fd1498Szrj }
308338fd1498Szrj
308438fd1498Szrj if (!vectype_in)
308538fd1498Szrj vectype_in = opvectype;
308638fd1498Szrj else if (opvectype
308738fd1498Szrj && opvectype != vectype_in)
308838fd1498Szrj {
308938fd1498Szrj if (dump_enabled_p ())
309038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309138fd1498Szrj "argument vector types differ.\n");
309238fd1498Szrj return false;
309338fd1498Szrj }
309438fd1498Szrj }
309538fd1498Szrj /* If all arguments are external or constant defs use a vector type with
309638fd1498Szrj the same size as the output vector type. */
309738fd1498Szrj if (!vectype_in)
309838fd1498Szrj vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
309938fd1498Szrj if (vec_stmt)
310038fd1498Szrj gcc_assert (vectype_in);
310138fd1498Szrj if (!vectype_in)
310238fd1498Szrj {
310338fd1498Szrj if (dump_enabled_p ())
310438fd1498Szrj {
310538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310638fd1498Szrj "no vectype for scalar type ");
310738fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
310838fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
310938fd1498Szrj }
311038fd1498Szrj
311138fd1498Szrj return false;
311238fd1498Szrj }
311338fd1498Szrj
311438fd1498Szrj /* FORNOW */
311538fd1498Szrj nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
311638fd1498Szrj nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
311738fd1498Szrj if (known_eq (nunits_in * 2, nunits_out))
311838fd1498Szrj modifier = NARROW;
311938fd1498Szrj else if (known_eq (nunits_out, nunits_in))
312038fd1498Szrj modifier = NONE;
312138fd1498Szrj else if (known_eq (nunits_out * 2, nunits_in))
312238fd1498Szrj modifier = WIDEN;
312338fd1498Szrj else
312438fd1498Szrj return false;
312538fd1498Szrj
312638fd1498Szrj /* We only handle functions that do not read or clobber memory. */
312738fd1498Szrj if (gimple_vuse (stmt))
312838fd1498Szrj {
312938fd1498Szrj if (dump_enabled_p ())
313038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
313138fd1498Szrj "function reads from or writes to memory.\n");
313238fd1498Szrj return false;
313338fd1498Szrj }
313438fd1498Szrj
313538fd1498Szrj /* For now, we only vectorize functions if a target specific builtin
313638fd1498Szrj is available. TODO -- in some cases, it might be profitable to
313738fd1498Szrj insert the calls for pieces of the vector, in order to be able
313838fd1498Szrj to vectorize other operations in the loop. */
313938fd1498Szrj fndecl = NULL_TREE;
314038fd1498Szrj internal_fn ifn = IFN_LAST;
314138fd1498Szrj combined_fn cfn = gimple_call_combined_fn (stmt);
314238fd1498Szrj tree callee = gimple_call_fndecl (stmt);
314338fd1498Szrj
314438fd1498Szrj /* First try using an internal function. */
314538fd1498Szrj tree_code convert_code = ERROR_MARK;
314638fd1498Szrj if (cfn != CFN_LAST
314738fd1498Szrj && (modifier == NONE
314838fd1498Szrj || (modifier == NARROW
314938fd1498Szrj && simple_integer_narrowing (vectype_out, vectype_in,
315038fd1498Szrj &convert_code))))
315138fd1498Szrj ifn = vectorizable_internal_function (cfn, callee, vectype_out,
315238fd1498Szrj vectype_in);
315338fd1498Szrj
315438fd1498Szrj /* If that fails, try asking for a target-specific built-in function. */
315538fd1498Szrj if (ifn == IFN_LAST)
315638fd1498Szrj {
315738fd1498Szrj if (cfn != CFN_LAST)
315838fd1498Szrj fndecl = targetm.vectorize.builtin_vectorized_function
315938fd1498Szrj (cfn, vectype_out, vectype_in);
316038fd1498Szrj else if (callee)
316138fd1498Szrj fndecl = targetm.vectorize.builtin_md_vectorized_function
316238fd1498Szrj (callee, vectype_out, vectype_in);
316338fd1498Szrj }
316438fd1498Szrj
316538fd1498Szrj if (ifn == IFN_LAST && !fndecl)
316638fd1498Szrj {
316738fd1498Szrj if (cfn == CFN_GOMP_SIMD_LANE
316838fd1498Szrj && !slp_node
316938fd1498Szrj && loop_vinfo
317038fd1498Szrj && LOOP_VINFO_LOOP (loop_vinfo)->simduid
317138fd1498Szrj && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
317238fd1498Szrj && LOOP_VINFO_LOOP (loop_vinfo)->simduid
317338fd1498Szrj == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
317438fd1498Szrj {
317538fd1498Szrj /* We can handle IFN_GOMP_SIMD_LANE by returning a
317638fd1498Szrj { 0, 1, 2, ... vf - 1 } vector. */
317738fd1498Szrj gcc_assert (nargs == 0);
317838fd1498Szrj }
317938fd1498Szrj else if (modifier == NONE
318038fd1498Szrj && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
318138fd1498Szrj || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
318238fd1498Szrj || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
318338fd1498Szrj return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
318438fd1498Szrj vectype_in, dt);
318538fd1498Szrj else
318638fd1498Szrj {
318738fd1498Szrj if (dump_enabled_p ())
318838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
318938fd1498Szrj "function is not vectorizable.\n");
319038fd1498Szrj return false;
319138fd1498Szrj }
319238fd1498Szrj }
319338fd1498Szrj
319438fd1498Szrj if (slp_node)
319538fd1498Szrj ncopies = 1;
319638fd1498Szrj else if (modifier == NARROW && ifn == IFN_LAST)
319738fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
319838fd1498Szrj else
319938fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
320038fd1498Szrj
320138fd1498Szrj /* Sanity check: make sure that at least one copy of the vectorized stmt
320238fd1498Szrj needs to be generated. */
320338fd1498Szrj gcc_assert (ncopies >= 1);
320438fd1498Szrj
320538fd1498Szrj if (!vec_stmt) /* transformation not required. */
320638fd1498Szrj {
320738fd1498Szrj STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
320838fd1498Szrj if (dump_enabled_p ())
320938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
321038fd1498Szrj "\n");
321138fd1498Szrj if (!slp_node)
321238fd1498Szrj {
321338fd1498Szrj vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
321438fd1498Szrj if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
321538fd1498Szrj add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
321638fd1498Szrj vec_promote_demote, stmt_info, 0, vect_body);
321738fd1498Szrj }
321838fd1498Szrj
321938fd1498Szrj return true;
322038fd1498Szrj }
322138fd1498Szrj
322238fd1498Szrj /* Transform. */
322338fd1498Szrj
322438fd1498Szrj if (dump_enabled_p ())
322538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
322638fd1498Szrj
322738fd1498Szrj /* Handle def. */
322838fd1498Szrj scalar_dest = gimple_call_lhs (stmt);
322938fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
323038fd1498Szrj
323138fd1498Szrj prev_stmt_info = NULL;
323238fd1498Szrj if (modifier == NONE || ifn != IFN_LAST)
323338fd1498Szrj {
323438fd1498Szrj tree prev_res = NULL_TREE;
323538fd1498Szrj for (j = 0; j < ncopies; ++j)
323638fd1498Szrj {
323738fd1498Szrj /* Build argument list for the vectorized call. */
323838fd1498Szrj if (j == 0)
323938fd1498Szrj vargs.create (nargs);
324038fd1498Szrj else
324138fd1498Szrj vargs.truncate (0);
324238fd1498Szrj
324338fd1498Szrj if (slp_node)
324438fd1498Szrj {
324538fd1498Szrj auto_vec<vec<tree> > vec_defs (nargs);
324638fd1498Szrj vec<tree> vec_oprnds0;
324738fd1498Szrj
324838fd1498Szrj for (i = 0; i < nargs; i++)
324938fd1498Szrj vargs.quick_push (gimple_call_arg (stmt, i));
325038fd1498Szrj vect_get_slp_defs (vargs, slp_node, &vec_defs);
325138fd1498Szrj vec_oprnds0 = vec_defs[0];
325238fd1498Szrj
325338fd1498Szrj /* Arguments are ready. Create the new vector stmt. */
325438fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
325538fd1498Szrj {
325638fd1498Szrj size_t k;
325738fd1498Szrj for (k = 0; k < nargs; k++)
325838fd1498Szrj {
325938fd1498Szrj vec<tree> vec_oprndsk = vec_defs[k];
326038fd1498Szrj vargs[k] = vec_oprndsk[i];
326138fd1498Szrj }
326238fd1498Szrj if (modifier == NARROW)
326338fd1498Szrj {
326438fd1498Szrj tree half_res = make_ssa_name (vectype_in);
326538fd1498Szrj gcall *call
326638fd1498Szrj = gimple_build_call_internal_vec (ifn, vargs);
326738fd1498Szrj gimple_call_set_lhs (call, half_res);
326838fd1498Szrj gimple_call_set_nothrow (call, true);
326938fd1498Szrj new_stmt = call;
327038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
327138fd1498Szrj if ((i & 1) == 0)
327238fd1498Szrj {
327338fd1498Szrj prev_res = half_res;
327438fd1498Szrj continue;
327538fd1498Szrj }
327638fd1498Szrj new_temp = make_ssa_name (vec_dest);
327738fd1498Szrj new_stmt = gimple_build_assign (new_temp, convert_code,
327838fd1498Szrj prev_res, half_res);
327938fd1498Szrj }
328038fd1498Szrj else
328138fd1498Szrj {
328238fd1498Szrj gcall *call;
328338fd1498Szrj if (ifn != IFN_LAST)
328438fd1498Szrj call = gimple_build_call_internal_vec (ifn, vargs);
328538fd1498Szrj else
328638fd1498Szrj call = gimple_build_call_vec (fndecl, vargs);
328738fd1498Szrj new_temp = make_ssa_name (vec_dest, call);
328838fd1498Szrj gimple_call_set_lhs (call, new_temp);
328938fd1498Szrj gimple_call_set_nothrow (call, true);
329038fd1498Szrj new_stmt = call;
329138fd1498Szrj }
329238fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
329338fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
329438fd1498Szrj }
329538fd1498Szrj
329638fd1498Szrj for (i = 0; i < nargs; i++)
329738fd1498Szrj {
329838fd1498Szrj vec<tree> vec_oprndsi = vec_defs[i];
329938fd1498Szrj vec_oprndsi.release ();
330038fd1498Szrj }
330138fd1498Szrj continue;
330238fd1498Szrj }
330338fd1498Szrj
330438fd1498Szrj for (i = 0; i < nargs; i++)
330538fd1498Szrj {
330638fd1498Szrj op = gimple_call_arg (stmt, i);
330738fd1498Szrj if (j == 0)
330838fd1498Szrj vec_oprnd0
330938fd1498Szrj = vect_get_vec_def_for_operand (op, stmt);
331038fd1498Szrj else
331138fd1498Szrj {
331238fd1498Szrj vec_oprnd0 = gimple_call_arg (new_stmt, i);
331338fd1498Szrj vec_oprnd0
331438fd1498Szrj = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
331538fd1498Szrj }
331638fd1498Szrj
331738fd1498Szrj vargs.quick_push (vec_oprnd0);
331838fd1498Szrj }
331938fd1498Szrj
332038fd1498Szrj if (gimple_call_internal_p (stmt)
332138fd1498Szrj && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
332238fd1498Szrj {
332338fd1498Szrj tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
332438fd1498Szrj tree new_var
332538fd1498Szrj = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
332638fd1498Szrj gimple *init_stmt = gimple_build_assign (new_var, cst);
332738fd1498Szrj vect_init_vector_1 (stmt, init_stmt, NULL);
332838fd1498Szrj new_temp = make_ssa_name (vec_dest);
332938fd1498Szrj new_stmt = gimple_build_assign (new_temp, new_var);
333038fd1498Szrj }
333138fd1498Szrj else if (modifier == NARROW)
333238fd1498Szrj {
333338fd1498Szrj tree half_res = make_ssa_name (vectype_in);
333438fd1498Szrj gcall *call = gimple_build_call_internal_vec (ifn, vargs);
333538fd1498Szrj gimple_call_set_lhs (call, half_res);
333638fd1498Szrj gimple_call_set_nothrow (call, true);
333738fd1498Szrj new_stmt = call;
333838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
333938fd1498Szrj if ((j & 1) == 0)
334038fd1498Szrj {
334138fd1498Szrj prev_res = half_res;
334238fd1498Szrj continue;
334338fd1498Szrj }
334438fd1498Szrj new_temp = make_ssa_name (vec_dest);
334538fd1498Szrj new_stmt = gimple_build_assign (new_temp, convert_code,
334638fd1498Szrj prev_res, half_res);
334738fd1498Szrj }
334838fd1498Szrj else
334938fd1498Szrj {
335038fd1498Szrj gcall *call;
335138fd1498Szrj if (ifn != IFN_LAST)
335238fd1498Szrj call = gimple_build_call_internal_vec (ifn, vargs);
335338fd1498Szrj else
335438fd1498Szrj call = gimple_build_call_vec (fndecl, vargs);
335538fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
335638fd1498Szrj gimple_call_set_lhs (call, new_temp);
335738fd1498Szrj gimple_call_set_nothrow (call, true);
335838fd1498Szrj new_stmt = call;
335938fd1498Szrj }
336038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
336138fd1498Szrj
336238fd1498Szrj if (j == (modifier == NARROW ? 1 : 0))
336338fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
336438fd1498Szrj else
336538fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
336638fd1498Szrj
336738fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
336838fd1498Szrj }
336938fd1498Szrj }
337038fd1498Szrj else if (modifier == NARROW)
337138fd1498Szrj {
337238fd1498Szrj for (j = 0; j < ncopies; ++j)
337338fd1498Szrj {
337438fd1498Szrj /* Build argument list for the vectorized call. */
337538fd1498Szrj if (j == 0)
337638fd1498Szrj vargs.create (nargs * 2);
337738fd1498Szrj else
337838fd1498Szrj vargs.truncate (0);
337938fd1498Szrj
338038fd1498Szrj if (slp_node)
338138fd1498Szrj {
338238fd1498Szrj auto_vec<vec<tree> > vec_defs (nargs);
338338fd1498Szrj vec<tree> vec_oprnds0;
338438fd1498Szrj
338538fd1498Szrj for (i = 0; i < nargs; i++)
338638fd1498Szrj vargs.quick_push (gimple_call_arg (stmt, i));
338738fd1498Szrj vect_get_slp_defs (vargs, slp_node, &vec_defs);
338838fd1498Szrj vec_oprnds0 = vec_defs[0];
338938fd1498Szrj
339038fd1498Szrj /* Arguments are ready. Create the new vector stmt. */
339138fd1498Szrj for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
339238fd1498Szrj {
339338fd1498Szrj size_t k;
339438fd1498Szrj vargs.truncate (0);
339538fd1498Szrj for (k = 0; k < nargs; k++)
339638fd1498Szrj {
339738fd1498Szrj vec<tree> vec_oprndsk = vec_defs[k];
339838fd1498Szrj vargs.quick_push (vec_oprndsk[i]);
339938fd1498Szrj vargs.quick_push (vec_oprndsk[i + 1]);
340038fd1498Szrj }
340138fd1498Szrj gcall *call;
340238fd1498Szrj if (ifn != IFN_LAST)
340338fd1498Szrj call = gimple_build_call_internal_vec (ifn, vargs);
340438fd1498Szrj else
340538fd1498Szrj call = gimple_build_call_vec (fndecl, vargs);
340638fd1498Szrj new_temp = make_ssa_name (vec_dest, call);
340738fd1498Szrj gimple_call_set_lhs (call, new_temp);
340838fd1498Szrj gimple_call_set_nothrow (call, true);
340938fd1498Szrj new_stmt = call;
341038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
341138fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
341238fd1498Szrj }
341338fd1498Szrj
341438fd1498Szrj for (i = 0; i < nargs; i++)
341538fd1498Szrj {
341638fd1498Szrj vec<tree> vec_oprndsi = vec_defs[i];
341738fd1498Szrj vec_oprndsi.release ();
341838fd1498Szrj }
341938fd1498Szrj continue;
342038fd1498Szrj }
342138fd1498Szrj
342238fd1498Szrj for (i = 0; i < nargs; i++)
342338fd1498Szrj {
342438fd1498Szrj op = gimple_call_arg (stmt, i);
342538fd1498Szrj if (j == 0)
342638fd1498Szrj {
342738fd1498Szrj vec_oprnd0
342838fd1498Szrj = vect_get_vec_def_for_operand (op, stmt);
342938fd1498Szrj vec_oprnd1
343038fd1498Szrj = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
343138fd1498Szrj }
343238fd1498Szrj else
343338fd1498Szrj {
343438fd1498Szrj vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
343538fd1498Szrj vec_oprnd0
343638fd1498Szrj = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
343738fd1498Szrj vec_oprnd1
343838fd1498Szrj = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
343938fd1498Szrj }
344038fd1498Szrj
344138fd1498Szrj vargs.quick_push (vec_oprnd0);
344238fd1498Szrj vargs.quick_push (vec_oprnd1);
344338fd1498Szrj }
344438fd1498Szrj
344538fd1498Szrj new_stmt = gimple_build_call_vec (fndecl, vargs);
344638fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
344738fd1498Szrj gimple_call_set_lhs (new_stmt, new_temp);
344838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
344938fd1498Szrj
345038fd1498Szrj if (j == 0)
345138fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
345238fd1498Szrj else
345338fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
345438fd1498Szrj
345538fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
345638fd1498Szrj }
345738fd1498Szrj
345838fd1498Szrj *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
345938fd1498Szrj }
346038fd1498Szrj else
346138fd1498Szrj /* No current target implements this case. */
346238fd1498Szrj return false;
346338fd1498Szrj
346438fd1498Szrj vargs.release ();
346538fd1498Szrj
346638fd1498Szrj /* The call in STMT might prevent it from being removed in dce.
346738fd1498Szrj We however cannot remove it here, due to the way the ssa name
346838fd1498Szrj it defines is mapped to the new definition. So just replace
346938fd1498Szrj rhs of the statement with something harmless. */
347038fd1498Szrj
347138fd1498Szrj if (slp_node)
347238fd1498Szrj return true;
347338fd1498Szrj
347438fd1498Szrj type = TREE_TYPE (scalar_dest);
347538fd1498Szrj if (is_pattern_stmt_p (stmt_info))
347638fd1498Szrj lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
347738fd1498Szrj else
347838fd1498Szrj lhs = gimple_call_lhs (stmt);
347938fd1498Szrj
348038fd1498Szrj new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
348138fd1498Szrj set_vinfo_for_stmt (new_stmt, stmt_info);
348238fd1498Szrj set_vinfo_for_stmt (stmt, NULL);
348338fd1498Szrj STMT_VINFO_STMT (stmt_info) = new_stmt;
348438fd1498Szrj gsi_replace (gsi, new_stmt, false);
348538fd1498Szrj
348638fd1498Szrj return true;
348738fd1498Szrj }
348838fd1498Szrj
348938fd1498Szrj
349038fd1498Szrj struct simd_call_arg_info
349138fd1498Szrj {
349238fd1498Szrj tree vectype;
349338fd1498Szrj tree op;
349438fd1498Szrj HOST_WIDE_INT linear_step;
349538fd1498Szrj enum vect_def_type dt;
349638fd1498Szrj unsigned int align;
349738fd1498Szrj bool simd_lane_linear;
349838fd1498Szrj };
349938fd1498Szrj
350038fd1498Szrj /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
350138fd1498Szrj is linear within simd lane (but not within whole loop), note it in
350238fd1498Szrj *ARGINFO. */
350338fd1498Szrj
350438fd1498Szrj static void
vect_simd_lane_linear(tree op,struct loop * loop,struct simd_call_arg_info * arginfo)350538fd1498Szrj vect_simd_lane_linear (tree op, struct loop *loop,
350638fd1498Szrj struct simd_call_arg_info *arginfo)
350738fd1498Szrj {
350838fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (op);
350938fd1498Szrj
351038fd1498Szrj if (!is_gimple_assign (def_stmt)
351138fd1498Szrj || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
351238fd1498Szrj || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
351338fd1498Szrj return;
351438fd1498Szrj
351538fd1498Szrj tree base = gimple_assign_rhs1 (def_stmt);
351638fd1498Szrj HOST_WIDE_INT linear_step = 0;
351738fd1498Szrj tree v = gimple_assign_rhs2 (def_stmt);
351838fd1498Szrj while (TREE_CODE (v) == SSA_NAME)
351938fd1498Szrj {
352038fd1498Szrj tree t;
352138fd1498Szrj def_stmt = SSA_NAME_DEF_STMT (v);
352238fd1498Szrj if (is_gimple_assign (def_stmt))
352338fd1498Szrj switch (gimple_assign_rhs_code (def_stmt))
352438fd1498Szrj {
352538fd1498Szrj case PLUS_EXPR:
352638fd1498Szrj t = gimple_assign_rhs2 (def_stmt);
352738fd1498Szrj if (linear_step || TREE_CODE (t) != INTEGER_CST)
352838fd1498Szrj return;
352938fd1498Szrj base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
353038fd1498Szrj v = gimple_assign_rhs1 (def_stmt);
353138fd1498Szrj continue;
353238fd1498Szrj case MULT_EXPR:
353338fd1498Szrj t = gimple_assign_rhs2 (def_stmt);
353438fd1498Szrj if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
353538fd1498Szrj return;
353638fd1498Szrj linear_step = tree_to_shwi (t);
353738fd1498Szrj v = gimple_assign_rhs1 (def_stmt);
353838fd1498Szrj continue;
353938fd1498Szrj CASE_CONVERT:
354038fd1498Szrj t = gimple_assign_rhs1 (def_stmt);
354138fd1498Szrj if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
354238fd1498Szrj || (TYPE_PRECISION (TREE_TYPE (v))
354338fd1498Szrj < TYPE_PRECISION (TREE_TYPE (t))))
354438fd1498Szrj return;
354538fd1498Szrj if (!linear_step)
354638fd1498Szrj linear_step = 1;
354738fd1498Szrj v = t;
354838fd1498Szrj continue;
354938fd1498Szrj default:
355038fd1498Szrj return;
355138fd1498Szrj }
355238fd1498Szrj else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
355338fd1498Szrj && loop->simduid
355438fd1498Szrj && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
355538fd1498Szrj && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
355638fd1498Szrj == loop->simduid))
355738fd1498Szrj {
355838fd1498Szrj if (!linear_step)
355938fd1498Szrj linear_step = 1;
356038fd1498Szrj arginfo->linear_step = linear_step;
356138fd1498Szrj arginfo->op = base;
356238fd1498Szrj arginfo->simd_lane_linear = true;
356338fd1498Szrj return;
356438fd1498Szrj }
356538fd1498Szrj }
356638fd1498Szrj }
356738fd1498Szrj
356838fd1498Szrj /* Return the number of elements in vector type VECTYPE, which is associated
356938fd1498Szrj with a SIMD clone. At present these vectors always have a constant
357038fd1498Szrj length. */
357138fd1498Szrj
357238fd1498Szrj static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)357338fd1498Szrj simd_clone_subparts (tree vectype)
357438fd1498Szrj {
357538fd1498Szrj return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
357638fd1498Szrj }
357738fd1498Szrj
357838fd1498Szrj /* Function vectorizable_simd_clone_call.
357938fd1498Szrj
358038fd1498Szrj Check if STMT performs a function call that can be vectorized
358138fd1498Szrj by calling a simd clone of the function.
358238fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
358338fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at BSI.
358438fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
358538fd1498Szrj
358638fd1498Szrj static bool
vectorizable_simd_clone_call(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)358738fd1498Szrj vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
358838fd1498Szrj gimple **vec_stmt, slp_tree slp_node)
358938fd1498Szrj {
359038fd1498Szrj tree vec_dest;
359138fd1498Szrj tree scalar_dest;
359238fd1498Szrj tree op, type;
359338fd1498Szrj tree vec_oprnd0 = NULL_TREE;
359438fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
359538fd1498Szrj tree vectype;
359638fd1498Szrj unsigned int nunits;
359738fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
359838fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
359938fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
360038fd1498Szrj struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
360138fd1498Szrj tree fndecl, new_temp;
360238fd1498Szrj gimple *def_stmt;
360338fd1498Szrj gimple *new_stmt = NULL;
360438fd1498Szrj int ncopies, j;
360538fd1498Szrj auto_vec<simd_call_arg_info> arginfo;
360638fd1498Szrj vec<tree> vargs = vNULL;
360738fd1498Szrj size_t i, nargs;
360838fd1498Szrj tree lhs, rtype, ratype;
360938fd1498Szrj vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
361038fd1498Szrj
361138fd1498Szrj /* Is STMT a vectorizable call? */
361238fd1498Szrj if (!is_gimple_call (stmt))
361338fd1498Szrj return false;
361438fd1498Szrj
361538fd1498Szrj fndecl = gimple_call_fndecl (stmt);
361638fd1498Szrj if (fndecl == NULL_TREE)
361738fd1498Szrj return false;
361838fd1498Szrj
361938fd1498Szrj struct cgraph_node *node = cgraph_node::get (fndecl);
362038fd1498Szrj if (node == NULL || node->simd_clones == NULL)
362138fd1498Szrj return false;
362238fd1498Szrj
362338fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
362438fd1498Szrj return false;
362538fd1498Szrj
362638fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
362738fd1498Szrj && ! vec_stmt)
362838fd1498Szrj return false;
362938fd1498Szrj
363038fd1498Szrj if (gimple_call_lhs (stmt)
363138fd1498Szrj && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
363238fd1498Szrj return false;
363338fd1498Szrj
363438fd1498Szrj gcc_checking_assert (!stmt_can_throw_internal (stmt));
363538fd1498Szrj
363638fd1498Szrj vectype = STMT_VINFO_VECTYPE (stmt_info);
363738fd1498Szrj
363838fd1498Szrj if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
363938fd1498Szrj return false;
364038fd1498Szrj
364138fd1498Szrj /* FORNOW */
364238fd1498Szrj if (slp_node)
364338fd1498Szrj return false;
364438fd1498Szrj
364538fd1498Szrj /* Process function arguments. */
364638fd1498Szrj nargs = gimple_call_num_args (stmt);
364738fd1498Szrj
364838fd1498Szrj /* Bail out if the function has zero arguments. */
364938fd1498Szrj if (nargs == 0)
365038fd1498Szrj return false;
365138fd1498Szrj
365238fd1498Szrj arginfo.reserve (nargs, true);
365338fd1498Szrj
365438fd1498Szrj for (i = 0; i < nargs; i++)
365538fd1498Szrj {
365638fd1498Szrj simd_call_arg_info thisarginfo;
365738fd1498Szrj affine_iv iv;
365838fd1498Szrj
365938fd1498Szrj thisarginfo.linear_step = 0;
366038fd1498Szrj thisarginfo.align = 0;
366138fd1498Szrj thisarginfo.op = NULL_TREE;
366238fd1498Szrj thisarginfo.simd_lane_linear = false;
366338fd1498Szrj
366438fd1498Szrj op = gimple_call_arg (stmt, i);
366538fd1498Szrj if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
366638fd1498Szrj &thisarginfo.vectype)
366738fd1498Szrj || thisarginfo.dt == vect_uninitialized_def)
366838fd1498Szrj {
366938fd1498Szrj if (dump_enabled_p ())
367038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
367138fd1498Szrj "use not simple.\n");
367238fd1498Szrj return false;
367338fd1498Szrj }
367438fd1498Szrj
367538fd1498Szrj if (thisarginfo.dt == vect_constant_def
367638fd1498Szrj || thisarginfo.dt == vect_external_def)
367738fd1498Szrj gcc_assert (thisarginfo.vectype == NULL_TREE);
367838fd1498Szrj else
367938fd1498Szrj gcc_assert (thisarginfo.vectype != NULL_TREE);
368038fd1498Szrj
368138fd1498Szrj /* For linear arguments, the analyze phase should have saved
368238fd1498Szrj the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
368338fd1498Szrj if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
368438fd1498Szrj && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
368538fd1498Szrj {
368638fd1498Szrj gcc_assert (vec_stmt);
368738fd1498Szrj thisarginfo.linear_step
368838fd1498Szrj = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
368938fd1498Szrj thisarginfo.op
369038fd1498Szrj = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
369138fd1498Szrj thisarginfo.simd_lane_linear
369238fd1498Szrj = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
369338fd1498Szrj == boolean_true_node);
369438fd1498Szrj /* If loop has been peeled for alignment, we need to adjust it. */
369538fd1498Szrj tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
369638fd1498Szrj tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
369738fd1498Szrj if (n1 != n2 && !thisarginfo.simd_lane_linear)
369838fd1498Szrj {
369938fd1498Szrj tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
370038fd1498Szrj tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
370138fd1498Szrj tree opt = TREE_TYPE (thisarginfo.op);
370238fd1498Szrj bias = fold_convert (TREE_TYPE (step), bias);
370338fd1498Szrj bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
370438fd1498Szrj thisarginfo.op
370538fd1498Szrj = fold_build2 (POINTER_TYPE_P (opt)
370638fd1498Szrj ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
370738fd1498Szrj thisarginfo.op, bias);
370838fd1498Szrj }
370938fd1498Szrj }
371038fd1498Szrj else if (!vec_stmt
371138fd1498Szrj && thisarginfo.dt != vect_constant_def
371238fd1498Szrj && thisarginfo.dt != vect_external_def
371338fd1498Szrj && loop_vinfo
371438fd1498Szrj && TREE_CODE (op) == SSA_NAME
371538fd1498Szrj && simple_iv (loop, loop_containing_stmt (stmt), op,
371638fd1498Szrj &iv, false)
371738fd1498Szrj && tree_fits_shwi_p (iv.step))
371838fd1498Szrj {
371938fd1498Szrj thisarginfo.linear_step = tree_to_shwi (iv.step);
372038fd1498Szrj thisarginfo.op = iv.base;
372138fd1498Szrj }
372238fd1498Szrj else if ((thisarginfo.dt == vect_constant_def
372338fd1498Szrj || thisarginfo.dt == vect_external_def)
372438fd1498Szrj && POINTER_TYPE_P (TREE_TYPE (op)))
372538fd1498Szrj thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
372638fd1498Szrj /* Addresses of array elements indexed by GOMP_SIMD_LANE are
372738fd1498Szrj linear too. */
372838fd1498Szrj if (POINTER_TYPE_P (TREE_TYPE (op))
372938fd1498Szrj && !thisarginfo.linear_step
373038fd1498Szrj && !vec_stmt
373138fd1498Szrj && thisarginfo.dt != vect_constant_def
373238fd1498Szrj && thisarginfo.dt != vect_external_def
373338fd1498Szrj && loop_vinfo
373438fd1498Szrj && !slp_node
373538fd1498Szrj && TREE_CODE (op) == SSA_NAME)
373638fd1498Szrj vect_simd_lane_linear (op, loop, &thisarginfo);
373738fd1498Szrj
373838fd1498Szrj arginfo.quick_push (thisarginfo);
373938fd1498Szrj }
374038fd1498Szrj
374138fd1498Szrj unsigned HOST_WIDE_INT vf;
374238fd1498Szrj if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
374338fd1498Szrj {
374438fd1498Szrj if (dump_enabled_p ())
374538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
374638fd1498Szrj "not considering SIMD clones; not yet supported"
374738fd1498Szrj " for variable-width vectors.\n");
3748*58e805e6Szrj return false;
374938fd1498Szrj }
375038fd1498Szrj
375138fd1498Szrj unsigned int badness = 0;
375238fd1498Szrj struct cgraph_node *bestn = NULL;
375338fd1498Szrj if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
375438fd1498Szrj bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
375538fd1498Szrj else
375638fd1498Szrj for (struct cgraph_node *n = node->simd_clones; n != NULL;
375738fd1498Szrj n = n->simdclone->next_clone)
375838fd1498Szrj {
375938fd1498Szrj unsigned int this_badness = 0;
376038fd1498Szrj if (n->simdclone->simdlen > vf
376138fd1498Szrj || n->simdclone->nargs != nargs)
376238fd1498Szrj continue;
376338fd1498Szrj if (n->simdclone->simdlen < vf)
376438fd1498Szrj this_badness += (exact_log2 (vf)
376538fd1498Szrj - exact_log2 (n->simdclone->simdlen)) * 1024;
376638fd1498Szrj if (n->simdclone->inbranch)
376738fd1498Szrj this_badness += 2048;
376838fd1498Szrj int target_badness = targetm.simd_clone.usable (n);
376938fd1498Szrj if (target_badness < 0)
377038fd1498Szrj continue;
377138fd1498Szrj this_badness += target_badness * 512;
377238fd1498Szrj /* FORNOW: Have to add code to add the mask argument. */
377338fd1498Szrj if (n->simdclone->inbranch)
377438fd1498Szrj continue;
377538fd1498Szrj for (i = 0; i < nargs; i++)
377638fd1498Szrj {
377738fd1498Szrj switch (n->simdclone->args[i].arg_type)
377838fd1498Szrj {
377938fd1498Szrj case SIMD_CLONE_ARG_TYPE_VECTOR:
378038fd1498Szrj if (!useless_type_conversion_p
378138fd1498Szrj (n->simdclone->args[i].orig_type,
378238fd1498Szrj TREE_TYPE (gimple_call_arg (stmt, i))))
378338fd1498Szrj i = -1;
378438fd1498Szrj else if (arginfo[i].dt == vect_constant_def
378538fd1498Szrj || arginfo[i].dt == vect_external_def
378638fd1498Szrj || arginfo[i].linear_step)
378738fd1498Szrj this_badness += 64;
378838fd1498Szrj break;
378938fd1498Szrj case SIMD_CLONE_ARG_TYPE_UNIFORM:
379038fd1498Szrj if (arginfo[i].dt != vect_constant_def
379138fd1498Szrj && arginfo[i].dt != vect_external_def)
379238fd1498Szrj i = -1;
379338fd1498Szrj break;
379438fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
379538fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
379638fd1498Szrj if (arginfo[i].dt == vect_constant_def
379738fd1498Szrj || arginfo[i].dt == vect_external_def
379838fd1498Szrj || (arginfo[i].linear_step
379938fd1498Szrj != n->simdclone->args[i].linear_step))
380038fd1498Szrj i = -1;
380138fd1498Szrj break;
380238fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
380338fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
380438fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
380538fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
380638fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
380738fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
380838fd1498Szrj /* FORNOW */
380938fd1498Szrj i = -1;
381038fd1498Szrj break;
381138fd1498Szrj case SIMD_CLONE_ARG_TYPE_MASK:
381238fd1498Szrj gcc_unreachable ();
381338fd1498Szrj }
381438fd1498Szrj if (i == (size_t) -1)
381538fd1498Szrj break;
381638fd1498Szrj if (n->simdclone->args[i].alignment > arginfo[i].align)
381738fd1498Szrj {
381838fd1498Szrj i = -1;
381938fd1498Szrj break;
382038fd1498Szrj }
382138fd1498Szrj if (arginfo[i].align)
382238fd1498Szrj this_badness += (exact_log2 (arginfo[i].align)
382338fd1498Szrj - exact_log2 (n->simdclone->args[i].alignment));
382438fd1498Szrj }
382538fd1498Szrj if (i == (size_t) -1)
382638fd1498Szrj continue;
382738fd1498Szrj if (bestn == NULL || this_badness < badness)
382838fd1498Szrj {
382938fd1498Szrj bestn = n;
383038fd1498Szrj badness = this_badness;
383138fd1498Szrj }
383238fd1498Szrj }
383338fd1498Szrj
383438fd1498Szrj if (bestn == NULL)
383538fd1498Szrj return false;
383638fd1498Szrj
383738fd1498Szrj for (i = 0; i < nargs; i++)
383838fd1498Szrj if ((arginfo[i].dt == vect_constant_def
383938fd1498Szrj || arginfo[i].dt == vect_external_def)
384038fd1498Szrj && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
384138fd1498Szrj {
384238fd1498Szrj arginfo[i].vectype
384338fd1498Szrj = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
384438fd1498Szrj i)));
384538fd1498Szrj if (arginfo[i].vectype == NULL
384638fd1498Szrj || (simd_clone_subparts (arginfo[i].vectype)
384738fd1498Szrj > bestn->simdclone->simdlen))
384838fd1498Szrj return false;
384938fd1498Szrj }
385038fd1498Szrj
385138fd1498Szrj fndecl = bestn->decl;
385238fd1498Szrj nunits = bestn->simdclone->simdlen;
385338fd1498Szrj ncopies = vf / nunits;
385438fd1498Szrj
385538fd1498Szrj /* If the function isn't const, only allow it in simd loops where user
385638fd1498Szrj has asserted that at least nunits consecutive iterations can be
385738fd1498Szrj performed using SIMD instructions. */
385838fd1498Szrj if ((loop == NULL || (unsigned) loop->safelen < nunits)
385938fd1498Szrj && gimple_vuse (stmt))
386038fd1498Szrj return false;
386138fd1498Szrj
386238fd1498Szrj /* Sanity check: make sure that at least one copy of the vectorized stmt
386338fd1498Szrj needs to be generated. */
386438fd1498Szrj gcc_assert (ncopies >= 1);
386538fd1498Szrj
386638fd1498Szrj if (!vec_stmt) /* transformation not required. */
386738fd1498Szrj {
386838fd1498Szrj STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
386938fd1498Szrj for (i = 0; i < nargs; i++)
387038fd1498Szrj if ((bestn->simdclone->args[i].arg_type
387138fd1498Szrj == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
387238fd1498Szrj || (bestn->simdclone->args[i].arg_type
387338fd1498Szrj == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
387438fd1498Szrj {
387538fd1498Szrj STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
387638fd1498Szrj + 1);
387738fd1498Szrj STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
387838fd1498Szrj tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
387938fd1498Szrj ? size_type_node : TREE_TYPE (arginfo[i].op);
388038fd1498Szrj tree ls = build_int_cst (lst, arginfo[i].linear_step);
388138fd1498Szrj STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
388238fd1498Szrj tree sll = arginfo[i].simd_lane_linear
388338fd1498Szrj ? boolean_true_node : boolean_false_node;
388438fd1498Szrj STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
388538fd1498Szrj }
388638fd1498Szrj STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
388738fd1498Szrj if (dump_enabled_p ())
388838fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
388938fd1498Szrj "=== vectorizable_simd_clone_call ===\n");
389038fd1498Szrj /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
389138fd1498Szrj return true;
389238fd1498Szrj }
389338fd1498Szrj
389438fd1498Szrj /* Transform. */
389538fd1498Szrj
389638fd1498Szrj if (dump_enabled_p ())
389738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
389838fd1498Szrj
389938fd1498Szrj /* Handle def. */
390038fd1498Szrj scalar_dest = gimple_call_lhs (stmt);
390138fd1498Szrj vec_dest = NULL_TREE;
390238fd1498Szrj rtype = NULL_TREE;
390338fd1498Szrj ratype = NULL_TREE;
390438fd1498Szrj if (scalar_dest)
390538fd1498Szrj {
390638fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype);
390738fd1498Szrj rtype = TREE_TYPE (TREE_TYPE (fndecl));
390838fd1498Szrj if (TREE_CODE (rtype) == ARRAY_TYPE)
390938fd1498Szrj {
391038fd1498Szrj ratype = rtype;
391138fd1498Szrj rtype = TREE_TYPE (ratype);
391238fd1498Szrj }
391338fd1498Szrj }
391438fd1498Szrj
391538fd1498Szrj prev_stmt_info = NULL;
391638fd1498Szrj for (j = 0; j < ncopies; ++j)
391738fd1498Szrj {
391838fd1498Szrj /* Build argument list for the vectorized call. */
391938fd1498Szrj if (j == 0)
392038fd1498Szrj vargs.create (nargs);
392138fd1498Szrj else
392238fd1498Szrj vargs.truncate (0);
392338fd1498Szrj
392438fd1498Szrj for (i = 0; i < nargs; i++)
392538fd1498Szrj {
392638fd1498Szrj unsigned int k, l, m, o;
392738fd1498Szrj tree atype;
392838fd1498Szrj op = gimple_call_arg (stmt, i);
392938fd1498Szrj switch (bestn->simdclone->args[i].arg_type)
393038fd1498Szrj {
393138fd1498Szrj case SIMD_CLONE_ARG_TYPE_VECTOR:
393238fd1498Szrj atype = bestn->simdclone->args[i].vector_type;
393338fd1498Szrj o = nunits / simd_clone_subparts (atype);
393438fd1498Szrj for (m = j * o; m < (j + 1) * o; m++)
393538fd1498Szrj {
393638fd1498Szrj if (simd_clone_subparts (atype)
393738fd1498Szrj < simd_clone_subparts (arginfo[i].vectype))
393838fd1498Szrj {
393938fd1498Szrj poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
394038fd1498Szrj k = (simd_clone_subparts (arginfo[i].vectype)
394138fd1498Szrj / simd_clone_subparts (atype));
394238fd1498Szrj gcc_assert ((k & (k - 1)) == 0);
394338fd1498Szrj if (m == 0)
394438fd1498Szrj vec_oprnd0
394538fd1498Szrj = vect_get_vec_def_for_operand (op, stmt);
394638fd1498Szrj else
394738fd1498Szrj {
394838fd1498Szrj vec_oprnd0 = arginfo[i].op;
394938fd1498Szrj if ((m & (k - 1)) == 0)
395038fd1498Szrj vec_oprnd0
395138fd1498Szrj = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
395238fd1498Szrj vec_oprnd0);
395338fd1498Szrj }
395438fd1498Szrj arginfo[i].op = vec_oprnd0;
395538fd1498Szrj vec_oprnd0
395638fd1498Szrj = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
395738fd1498Szrj bitsize_int (prec),
395838fd1498Szrj bitsize_int ((m & (k - 1)) * prec));
395938fd1498Szrj new_stmt
396038fd1498Szrj = gimple_build_assign (make_ssa_name (atype),
396138fd1498Szrj vec_oprnd0);
396238fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
396338fd1498Szrj vargs.safe_push (gimple_assign_lhs (new_stmt));
396438fd1498Szrj }
396538fd1498Szrj else
396638fd1498Szrj {
396738fd1498Szrj k = (simd_clone_subparts (atype)
396838fd1498Szrj / simd_clone_subparts (arginfo[i].vectype));
396938fd1498Szrj gcc_assert ((k & (k - 1)) == 0);
397038fd1498Szrj vec<constructor_elt, va_gc> *ctor_elts;
397138fd1498Szrj if (k != 1)
397238fd1498Szrj vec_alloc (ctor_elts, k);
397338fd1498Szrj else
397438fd1498Szrj ctor_elts = NULL;
397538fd1498Szrj for (l = 0; l < k; l++)
397638fd1498Szrj {
397738fd1498Szrj if (m == 0 && l == 0)
397838fd1498Szrj vec_oprnd0
397938fd1498Szrj = vect_get_vec_def_for_operand (op, stmt);
398038fd1498Szrj else
398138fd1498Szrj vec_oprnd0
398238fd1498Szrj = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
398338fd1498Szrj arginfo[i].op);
398438fd1498Szrj arginfo[i].op = vec_oprnd0;
398538fd1498Szrj if (k == 1)
398638fd1498Szrj break;
398738fd1498Szrj CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
398838fd1498Szrj vec_oprnd0);
398938fd1498Szrj }
399038fd1498Szrj if (k == 1)
399138fd1498Szrj vargs.safe_push (vec_oprnd0);
399238fd1498Szrj else
399338fd1498Szrj {
399438fd1498Szrj vec_oprnd0 = build_constructor (atype, ctor_elts);
399538fd1498Szrj new_stmt
399638fd1498Szrj = gimple_build_assign (make_ssa_name (atype),
399738fd1498Szrj vec_oprnd0);
399838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
399938fd1498Szrj vargs.safe_push (gimple_assign_lhs (new_stmt));
400038fd1498Szrj }
400138fd1498Szrj }
400238fd1498Szrj }
400338fd1498Szrj break;
400438fd1498Szrj case SIMD_CLONE_ARG_TYPE_UNIFORM:
400538fd1498Szrj vargs.safe_push (op);
400638fd1498Szrj break;
400738fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
400838fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
400938fd1498Szrj if (j == 0)
401038fd1498Szrj {
401138fd1498Szrj gimple_seq stmts;
401238fd1498Szrj arginfo[i].op
401338fd1498Szrj = force_gimple_operand (arginfo[i].op, &stmts, true,
401438fd1498Szrj NULL_TREE);
401538fd1498Szrj if (stmts != NULL)
401638fd1498Szrj {
401738fd1498Szrj basic_block new_bb;
401838fd1498Szrj edge pe = loop_preheader_edge (loop);
401938fd1498Szrj new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
402038fd1498Szrj gcc_assert (!new_bb);
402138fd1498Szrj }
402238fd1498Szrj if (arginfo[i].simd_lane_linear)
402338fd1498Szrj {
402438fd1498Szrj vargs.safe_push (arginfo[i].op);
402538fd1498Szrj break;
402638fd1498Szrj }
402738fd1498Szrj tree phi_res = copy_ssa_name (op);
402838fd1498Szrj gphi *new_phi = create_phi_node (phi_res, loop->header);
402938fd1498Szrj set_vinfo_for_stmt (new_phi,
403038fd1498Szrj new_stmt_vec_info (new_phi, loop_vinfo));
403138fd1498Szrj add_phi_arg (new_phi, arginfo[i].op,
403238fd1498Szrj loop_preheader_edge (loop), UNKNOWN_LOCATION);
403338fd1498Szrj enum tree_code code
403438fd1498Szrj = POINTER_TYPE_P (TREE_TYPE (op))
403538fd1498Szrj ? POINTER_PLUS_EXPR : PLUS_EXPR;
403638fd1498Szrj tree type = POINTER_TYPE_P (TREE_TYPE (op))
403738fd1498Szrj ? sizetype : TREE_TYPE (op);
403838fd1498Szrj widest_int cst
403938fd1498Szrj = wi::mul (bestn->simdclone->args[i].linear_step,
404038fd1498Szrj ncopies * nunits);
404138fd1498Szrj tree tcst = wide_int_to_tree (type, cst);
404238fd1498Szrj tree phi_arg = copy_ssa_name (op);
404338fd1498Szrj new_stmt
404438fd1498Szrj = gimple_build_assign (phi_arg, code, phi_res, tcst);
404538fd1498Szrj gimple_stmt_iterator si = gsi_after_labels (loop->header);
404638fd1498Szrj gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
404738fd1498Szrj set_vinfo_for_stmt (new_stmt,
404838fd1498Szrj new_stmt_vec_info (new_stmt, loop_vinfo));
404938fd1498Szrj add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
405038fd1498Szrj UNKNOWN_LOCATION);
405138fd1498Szrj arginfo[i].op = phi_res;
405238fd1498Szrj vargs.safe_push (phi_res);
405338fd1498Szrj }
405438fd1498Szrj else
405538fd1498Szrj {
405638fd1498Szrj enum tree_code code
405738fd1498Szrj = POINTER_TYPE_P (TREE_TYPE (op))
405838fd1498Szrj ? POINTER_PLUS_EXPR : PLUS_EXPR;
405938fd1498Szrj tree type = POINTER_TYPE_P (TREE_TYPE (op))
406038fd1498Szrj ? sizetype : TREE_TYPE (op);
406138fd1498Szrj widest_int cst
406238fd1498Szrj = wi::mul (bestn->simdclone->args[i].linear_step,
406338fd1498Szrj j * nunits);
406438fd1498Szrj tree tcst = wide_int_to_tree (type, cst);
406538fd1498Szrj new_temp = make_ssa_name (TREE_TYPE (op));
406638fd1498Szrj new_stmt = gimple_build_assign (new_temp, code,
406738fd1498Szrj arginfo[i].op, tcst);
406838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
406938fd1498Szrj vargs.safe_push (new_temp);
407038fd1498Szrj }
407138fd1498Szrj break;
407238fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
407338fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
407438fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
407538fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
407638fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
407738fd1498Szrj case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
407838fd1498Szrj default:
407938fd1498Szrj gcc_unreachable ();
408038fd1498Szrj }
408138fd1498Szrj }
408238fd1498Szrj
408338fd1498Szrj new_stmt = gimple_build_call_vec (fndecl, vargs);
408438fd1498Szrj if (vec_dest)
408538fd1498Szrj {
408638fd1498Szrj gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
408738fd1498Szrj if (ratype)
408838fd1498Szrj new_temp = create_tmp_var (ratype);
408938fd1498Szrj else if (simd_clone_subparts (vectype)
409038fd1498Szrj == simd_clone_subparts (rtype))
409138fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
409238fd1498Szrj else
409338fd1498Szrj new_temp = make_ssa_name (rtype, new_stmt);
409438fd1498Szrj gimple_call_set_lhs (new_stmt, new_temp);
409538fd1498Szrj }
409638fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
409738fd1498Szrj
409838fd1498Szrj if (vec_dest)
409938fd1498Szrj {
410038fd1498Szrj if (simd_clone_subparts (vectype) < nunits)
410138fd1498Szrj {
410238fd1498Szrj unsigned int k, l;
410338fd1498Szrj poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
410438fd1498Szrj poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
410538fd1498Szrj k = nunits / simd_clone_subparts (vectype);
410638fd1498Szrj gcc_assert ((k & (k - 1)) == 0);
410738fd1498Szrj for (l = 0; l < k; l++)
410838fd1498Szrj {
410938fd1498Szrj tree t;
411038fd1498Szrj if (ratype)
411138fd1498Szrj {
411238fd1498Szrj t = build_fold_addr_expr (new_temp);
411338fd1498Szrj t = build2 (MEM_REF, vectype, t,
411438fd1498Szrj build_int_cst (TREE_TYPE (t), l * bytes));
411538fd1498Szrj }
411638fd1498Szrj else
411738fd1498Szrj t = build3 (BIT_FIELD_REF, vectype, new_temp,
411838fd1498Szrj bitsize_int (prec), bitsize_int (l * prec));
411938fd1498Szrj new_stmt
412038fd1498Szrj = gimple_build_assign (make_ssa_name (vectype), t);
412138fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
412238fd1498Szrj if (j == 0 && l == 0)
412338fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
412438fd1498Szrj else
412538fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
412638fd1498Szrj
412738fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
412838fd1498Szrj }
412938fd1498Szrj
413038fd1498Szrj if (ratype)
413138fd1498Szrj {
413238fd1498Szrj tree clobber = build_constructor (ratype, NULL);
413338fd1498Szrj TREE_THIS_VOLATILE (clobber) = 1;
413438fd1498Szrj new_stmt = gimple_build_assign (new_temp, clobber);
413538fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
413638fd1498Szrj }
413738fd1498Szrj continue;
413838fd1498Szrj }
413938fd1498Szrj else if (simd_clone_subparts (vectype) > nunits)
414038fd1498Szrj {
414138fd1498Szrj unsigned int k = (simd_clone_subparts (vectype)
414238fd1498Szrj / simd_clone_subparts (rtype));
414338fd1498Szrj gcc_assert ((k & (k - 1)) == 0);
414438fd1498Szrj if ((j & (k - 1)) == 0)
414538fd1498Szrj vec_alloc (ret_ctor_elts, k);
414638fd1498Szrj if (ratype)
414738fd1498Szrj {
414838fd1498Szrj unsigned int m, o = nunits / simd_clone_subparts (rtype);
414938fd1498Szrj for (m = 0; m < o; m++)
415038fd1498Szrj {
415138fd1498Szrj tree tem = build4 (ARRAY_REF, rtype, new_temp,
415238fd1498Szrj size_int (m), NULL_TREE, NULL_TREE);
415338fd1498Szrj new_stmt
415438fd1498Szrj = gimple_build_assign (make_ssa_name (rtype), tem);
415538fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
415638fd1498Szrj CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
415738fd1498Szrj gimple_assign_lhs (new_stmt));
415838fd1498Szrj }
415938fd1498Szrj tree clobber = build_constructor (ratype, NULL);
416038fd1498Szrj TREE_THIS_VOLATILE (clobber) = 1;
416138fd1498Szrj new_stmt = gimple_build_assign (new_temp, clobber);
416238fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
416338fd1498Szrj }
416438fd1498Szrj else
416538fd1498Szrj CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
416638fd1498Szrj if ((j & (k - 1)) != k - 1)
416738fd1498Szrj continue;
416838fd1498Szrj vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
416938fd1498Szrj new_stmt
417038fd1498Szrj = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
417138fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
417238fd1498Szrj
417338fd1498Szrj if ((unsigned) j == k - 1)
417438fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
417538fd1498Szrj else
417638fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
417738fd1498Szrj
417838fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
417938fd1498Szrj continue;
418038fd1498Szrj }
418138fd1498Szrj else if (ratype)
418238fd1498Szrj {
418338fd1498Szrj tree t = build_fold_addr_expr (new_temp);
418438fd1498Szrj t = build2 (MEM_REF, vectype, t,
418538fd1498Szrj build_int_cst (TREE_TYPE (t), 0));
418638fd1498Szrj new_stmt
418738fd1498Szrj = gimple_build_assign (make_ssa_name (vec_dest), t);
418838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
418938fd1498Szrj tree clobber = build_constructor (ratype, NULL);
419038fd1498Szrj TREE_THIS_VOLATILE (clobber) = 1;
419138fd1498Szrj vect_finish_stmt_generation (stmt,
419238fd1498Szrj gimple_build_assign (new_temp,
419338fd1498Szrj clobber), gsi);
419438fd1498Szrj }
419538fd1498Szrj }
419638fd1498Szrj
419738fd1498Szrj if (j == 0)
419838fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
419938fd1498Szrj else
420038fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
420138fd1498Szrj
420238fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
420338fd1498Szrj }
420438fd1498Szrj
420538fd1498Szrj vargs.release ();
420638fd1498Szrj
420738fd1498Szrj /* The call in STMT might prevent it from being removed in dce.
420838fd1498Szrj We however cannot remove it here, due to the way the ssa name
420938fd1498Szrj it defines is mapped to the new definition. So just replace
421038fd1498Szrj rhs of the statement with something harmless. */
421138fd1498Szrj
421238fd1498Szrj if (slp_node)
421338fd1498Szrj return true;
421438fd1498Szrj
421538fd1498Szrj if (scalar_dest)
421638fd1498Szrj {
421738fd1498Szrj type = TREE_TYPE (scalar_dest);
421838fd1498Szrj if (is_pattern_stmt_p (stmt_info))
421938fd1498Szrj lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
422038fd1498Szrj else
422138fd1498Szrj lhs = gimple_call_lhs (stmt);
422238fd1498Szrj new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
422338fd1498Szrj }
422438fd1498Szrj else
422538fd1498Szrj new_stmt = gimple_build_nop ();
422638fd1498Szrj set_vinfo_for_stmt (new_stmt, stmt_info);
422738fd1498Szrj set_vinfo_for_stmt (stmt, NULL);
422838fd1498Szrj STMT_VINFO_STMT (stmt_info) = new_stmt;
422938fd1498Szrj gsi_replace (gsi, new_stmt, true);
423038fd1498Szrj unlink_stmt_vdef (stmt);
423138fd1498Szrj
423238fd1498Szrj return true;
423338fd1498Szrj }
423438fd1498Szrj
423538fd1498Szrj
423638fd1498Szrj /* Function vect_gen_widened_results_half
423738fd1498Szrj
423838fd1498Szrj Create a vector stmt whose code, type, number of arguments, and result
423938fd1498Szrj variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
424038fd1498Szrj VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
424138fd1498Szrj In the case that CODE is a CALL_EXPR, this means that a call to DECL
424238fd1498Szrj needs to be created (DECL is a function-decl of a target-builtin).
424338fd1498Szrj STMT is the original scalar stmt that we are vectorizing. */
424438fd1498Szrj
424538fd1498Szrj static gimple *
vect_gen_widened_results_half(enum tree_code code,tree decl,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,gimple * stmt)424638fd1498Szrj vect_gen_widened_results_half (enum tree_code code,
424738fd1498Szrj tree decl,
424838fd1498Szrj tree vec_oprnd0, tree vec_oprnd1, int op_type,
424938fd1498Szrj tree vec_dest, gimple_stmt_iterator *gsi,
425038fd1498Szrj gimple *stmt)
425138fd1498Szrj {
425238fd1498Szrj gimple *new_stmt;
425338fd1498Szrj tree new_temp;
425438fd1498Szrj
425538fd1498Szrj /* Generate half of the widened result: */
425638fd1498Szrj if (code == CALL_EXPR)
425738fd1498Szrj {
425838fd1498Szrj /* Target specific support */
425938fd1498Szrj if (op_type == binary_op)
426038fd1498Szrj new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
426138fd1498Szrj else
426238fd1498Szrj new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
426338fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
426438fd1498Szrj gimple_call_set_lhs (new_stmt, new_temp);
426538fd1498Szrj }
426638fd1498Szrj else
426738fd1498Szrj {
426838fd1498Szrj /* Generic support */
426938fd1498Szrj gcc_assert (op_type == TREE_CODE_LENGTH (code));
427038fd1498Szrj if (op_type != binary_op)
427138fd1498Szrj vec_oprnd1 = NULL;
427238fd1498Szrj new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
427338fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
427438fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
427538fd1498Szrj }
427638fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
427738fd1498Szrj
427838fd1498Szrj return new_stmt;
427938fd1498Szrj }
428038fd1498Szrj
428138fd1498Szrj
428238fd1498Szrj /* Get vectorized definitions for loop-based vectorization. For the first
428338fd1498Szrj operand we call vect_get_vec_def_for_operand() (with OPRND containing
428438fd1498Szrj scalar operand), and for the rest we get a copy with
428538fd1498Szrj vect_get_vec_def_for_stmt_copy() using the previous vector definition
428638fd1498Szrj (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
428738fd1498Szrj The vectors are collected into VEC_OPRNDS. */
428838fd1498Szrj
428938fd1498Szrj static void
vect_get_loop_based_defs(tree * oprnd,gimple * stmt,enum vect_def_type dt,vec<tree> * vec_oprnds,int multi_step_cvt)429038fd1498Szrj vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
429138fd1498Szrj vec<tree> *vec_oprnds, int multi_step_cvt)
429238fd1498Szrj {
429338fd1498Szrj tree vec_oprnd;
429438fd1498Szrj
429538fd1498Szrj /* Get first vector operand. */
429638fd1498Szrj /* All the vector operands except the very first one (that is scalar oprnd)
429738fd1498Szrj are stmt copies. */
429838fd1498Szrj if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
429938fd1498Szrj vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
430038fd1498Szrj else
430138fd1498Szrj vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
430238fd1498Szrj
430338fd1498Szrj vec_oprnds->quick_push (vec_oprnd);
430438fd1498Szrj
430538fd1498Szrj /* Get second vector operand. */
430638fd1498Szrj vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
430738fd1498Szrj vec_oprnds->quick_push (vec_oprnd);
430838fd1498Szrj
430938fd1498Szrj *oprnd = vec_oprnd;
431038fd1498Szrj
431138fd1498Szrj /* For conversion in multiple steps, continue to get operands
431238fd1498Szrj recursively. */
431338fd1498Szrj if (multi_step_cvt)
431438fd1498Szrj vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
431538fd1498Szrj }
431638fd1498Szrj
431738fd1498Szrj
431838fd1498Szrj /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
431938fd1498Szrj For multi-step conversions store the resulting vectors and call the function
432038fd1498Szrj recursively. */
432138fd1498Szrj
432238fd1498Szrj static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,gimple * stmt,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)432338fd1498Szrj vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
432438fd1498Szrj int multi_step_cvt, gimple *stmt,
432538fd1498Szrj vec<tree> vec_dsts,
432638fd1498Szrj gimple_stmt_iterator *gsi,
432738fd1498Szrj slp_tree slp_node, enum tree_code code,
432838fd1498Szrj stmt_vec_info *prev_stmt_info)
432938fd1498Szrj {
433038fd1498Szrj unsigned int i;
433138fd1498Szrj tree vop0, vop1, new_tmp, vec_dest;
433238fd1498Szrj gimple *new_stmt;
433338fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
433438fd1498Szrj
433538fd1498Szrj vec_dest = vec_dsts.pop ();
433638fd1498Szrj
433738fd1498Szrj for (i = 0; i < vec_oprnds->length (); i += 2)
433838fd1498Szrj {
433938fd1498Szrj /* Create demotion operation. */
434038fd1498Szrj vop0 = (*vec_oprnds)[i];
434138fd1498Szrj vop1 = (*vec_oprnds)[i + 1];
434238fd1498Szrj new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
434338fd1498Szrj new_tmp = make_ssa_name (vec_dest, new_stmt);
434438fd1498Szrj gimple_assign_set_lhs (new_stmt, new_tmp);
434538fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
434638fd1498Szrj
434738fd1498Szrj if (multi_step_cvt)
434838fd1498Szrj /* Store the resulting vector for next recursive call. */
434938fd1498Szrj (*vec_oprnds)[i/2] = new_tmp;
435038fd1498Szrj else
435138fd1498Szrj {
435238fd1498Szrj /* This is the last step of the conversion sequence. Store the
435338fd1498Szrj vectors in SLP_NODE or in vector info of the scalar statement
435438fd1498Szrj (or in STMT_VINFO_RELATED_STMT chain). */
435538fd1498Szrj if (slp_node)
435638fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
435738fd1498Szrj else
435838fd1498Szrj {
435938fd1498Szrj if (!*prev_stmt_info)
436038fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
436138fd1498Szrj else
436238fd1498Szrj STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
436338fd1498Szrj
436438fd1498Szrj *prev_stmt_info = vinfo_for_stmt (new_stmt);
436538fd1498Szrj }
436638fd1498Szrj }
436738fd1498Szrj }
436838fd1498Szrj
436938fd1498Szrj /* For multi-step demotion operations we first generate demotion operations
437038fd1498Szrj from the source type to the intermediate types, and then combine the
437138fd1498Szrj results (stored in VEC_OPRNDS) in demotion operation to the destination
437238fd1498Szrj type. */
437338fd1498Szrj if (multi_step_cvt)
437438fd1498Szrj {
437538fd1498Szrj /* At each level of recursion we have half of the operands we had at the
437638fd1498Szrj previous level. */
437738fd1498Szrj vec_oprnds->truncate ((i+1)/2);
437838fd1498Szrj vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
437938fd1498Szrj stmt, vec_dsts, gsi, slp_node,
438038fd1498Szrj VEC_PACK_TRUNC_EXPR,
438138fd1498Szrj prev_stmt_info);
438238fd1498Szrj }
438338fd1498Szrj
438438fd1498Szrj vec_dsts.quick_push (vec_dest);
438538fd1498Szrj }
438638fd1498Szrj
438738fd1498Szrj
438838fd1498Szrj /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
438938fd1498Szrj and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
439038fd1498Szrj the resulting vectors and call the function recursively. */
439138fd1498Szrj
439238fd1498Szrj static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,gimple * stmt,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,tree decl1,tree decl2,int op_type)439338fd1498Szrj vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
439438fd1498Szrj vec<tree> *vec_oprnds1,
439538fd1498Szrj gimple *stmt, tree vec_dest,
439638fd1498Szrj gimple_stmt_iterator *gsi,
439738fd1498Szrj enum tree_code code1,
439838fd1498Szrj enum tree_code code2, tree decl1,
439938fd1498Szrj tree decl2, int op_type)
440038fd1498Szrj {
440138fd1498Szrj int i;
440238fd1498Szrj tree vop0, vop1, new_tmp1, new_tmp2;
440338fd1498Szrj gimple *new_stmt1, *new_stmt2;
440438fd1498Szrj vec<tree> vec_tmp = vNULL;
440538fd1498Szrj
440638fd1498Szrj vec_tmp.create (vec_oprnds0->length () * 2);
440738fd1498Szrj FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
440838fd1498Szrj {
440938fd1498Szrj if (op_type == binary_op)
441038fd1498Szrj vop1 = (*vec_oprnds1)[i];
441138fd1498Szrj else
441238fd1498Szrj vop1 = NULL_TREE;
441338fd1498Szrj
441438fd1498Szrj /* Generate the two halves of promotion operation. */
441538fd1498Szrj new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
441638fd1498Szrj op_type, vec_dest, gsi, stmt);
441738fd1498Szrj new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
441838fd1498Szrj op_type, vec_dest, gsi, stmt);
441938fd1498Szrj if (is_gimple_call (new_stmt1))
442038fd1498Szrj {
442138fd1498Szrj new_tmp1 = gimple_call_lhs (new_stmt1);
442238fd1498Szrj new_tmp2 = gimple_call_lhs (new_stmt2);
442338fd1498Szrj }
442438fd1498Szrj else
442538fd1498Szrj {
442638fd1498Szrj new_tmp1 = gimple_assign_lhs (new_stmt1);
442738fd1498Szrj new_tmp2 = gimple_assign_lhs (new_stmt2);
442838fd1498Szrj }
442938fd1498Szrj
443038fd1498Szrj /* Store the results for the next step. */
443138fd1498Szrj vec_tmp.quick_push (new_tmp1);
443238fd1498Szrj vec_tmp.quick_push (new_tmp2);
443338fd1498Szrj }
443438fd1498Szrj
443538fd1498Szrj vec_oprnds0->release ();
443638fd1498Szrj *vec_oprnds0 = vec_tmp;
443738fd1498Szrj }
443838fd1498Szrj
443938fd1498Szrj
444038fd1498Szrj /* Check if STMT performs a conversion operation, that can be vectorized.
444138fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
444238fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at GSI.
444338fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
444438fd1498Szrj
444538fd1498Szrj static bool
vectorizable_conversion(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)444638fd1498Szrj vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
444738fd1498Szrj gimple **vec_stmt, slp_tree slp_node)
444838fd1498Szrj {
444938fd1498Szrj tree vec_dest;
445038fd1498Szrj tree scalar_dest;
445138fd1498Szrj tree op0, op1 = NULL_TREE;
445238fd1498Szrj tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
445338fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
445438fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
445538fd1498Szrj enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
445638fd1498Szrj enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
445738fd1498Szrj tree decl1 = NULL_TREE, decl2 = NULL_TREE;
445838fd1498Szrj tree new_temp;
445938fd1498Szrj gimple *def_stmt;
446038fd1498Szrj enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
446138fd1498Szrj int ndts = 2;
446238fd1498Szrj gimple *new_stmt = NULL;
446338fd1498Szrj stmt_vec_info prev_stmt_info;
446438fd1498Szrj poly_uint64 nunits_in;
446538fd1498Szrj poly_uint64 nunits_out;
446638fd1498Szrj tree vectype_out, vectype_in;
446738fd1498Szrj int ncopies, i, j;
446838fd1498Szrj tree lhs_type, rhs_type;
446938fd1498Szrj enum { NARROW, NONE, WIDEN } modifier;
447038fd1498Szrj vec<tree> vec_oprnds0 = vNULL;
447138fd1498Szrj vec<tree> vec_oprnds1 = vNULL;
447238fd1498Szrj tree vop0;
447338fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
447438fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
447538fd1498Szrj int multi_step_cvt = 0;
447638fd1498Szrj vec<tree> interm_types = vNULL;
447738fd1498Szrj tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
447838fd1498Szrj int op_type;
447938fd1498Szrj unsigned short fltsz;
448038fd1498Szrj
448138fd1498Szrj /* Is STMT a vectorizable conversion? */
448238fd1498Szrj
448338fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
448438fd1498Szrj return false;
448538fd1498Szrj
448638fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
448738fd1498Szrj && ! vec_stmt)
448838fd1498Szrj return false;
448938fd1498Szrj
449038fd1498Szrj if (!is_gimple_assign (stmt))
449138fd1498Szrj return false;
449238fd1498Szrj
449338fd1498Szrj if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
449438fd1498Szrj return false;
449538fd1498Szrj
449638fd1498Szrj code = gimple_assign_rhs_code (stmt);
449738fd1498Szrj if (!CONVERT_EXPR_CODE_P (code)
449838fd1498Szrj && code != FIX_TRUNC_EXPR
449938fd1498Szrj && code != FLOAT_EXPR
450038fd1498Szrj && code != WIDEN_MULT_EXPR
450138fd1498Szrj && code != WIDEN_LSHIFT_EXPR)
450238fd1498Szrj return false;
450338fd1498Szrj
450438fd1498Szrj op_type = TREE_CODE_LENGTH (code);
450538fd1498Szrj
450638fd1498Szrj /* Check types of lhs and rhs. */
450738fd1498Szrj scalar_dest = gimple_assign_lhs (stmt);
450838fd1498Szrj lhs_type = TREE_TYPE (scalar_dest);
450938fd1498Szrj vectype_out = STMT_VINFO_VECTYPE (stmt_info);
451038fd1498Szrj
451138fd1498Szrj op0 = gimple_assign_rhs1 (stmt);
451238fd1498Szrj rhs_type = TREE_TYPE (op0);
451338fd1498Szrj
451438fd1498Szrj if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
451538fd1498Szrj && !((INTEGRAL_TYPE_P (lhs_type)
451638fd1498Szrj && INTEGRAL_TYPE_P (rhs_type))
451738fd1498Szrj || (SCALAR_FLOAT_TYPE_P (lhs_type)
451838fd1498Szrj && SCALAR_FLOAT_TYPE_P (rhs_type))))
451938fd1498Szrj return false;
452038fd1498Szrj
452138fd1498Szrj if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
452238fd1498Szrj && ((INTEGRAL_TYPE_P (lhs_type)
452338fd1498Szrj && !type_has_mode_precision_p (lhs_type))
452438fd1498Szrj || (INTEGRAL_TYPE_P (rhs_type)
452538fd1498Szrj && !type_has_mode_precision_p (rhs_type))))
452638fd1498Szrj {
452738fd1498Szrj if (dump_enabled_p ())
452838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
452938fd1498Szrj "type conversion to/from bit-precision unsupported."
453038fd1498Szrj "\n");
453138fd1498Szrj return false;
453238fd1498Szrj }
453338fd1498Szrj
453438fd1498Szrj /* Check the operands of the operation. */
453538fd1498Szrj if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
453638fd1498Szrj {
453738fd1498Szrj if (dump_enabled_p ())
453838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
453938fd1498Szrj "use not simple.\n");
454038fd1498Szrj return false;
454138fd1498Szrj }
454238fd1498Szrj if (op_type == binary_op)
454338fd1498Szrj {
454438fd1498Szrj bool ok;
454538fd1498Szrj
454638fd1498Szrj op1 = gimple_assign_rhs2 (stmt);
454738fd1498Szrj gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
454838fd1498Szrj /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
454938fd1498Szrj OP1. */
455038fd1498Szrj if (CONSTANT_CLASS_P (op0))
455138fd1498Szrj ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
455238fd1498Szrj else
455338fd1498Szrj ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
455438fd1498Szrj
455538fd1498Szrj if (!ok)
455638fd1498Szrj {
455738fd1498Szrj if (dump_enabled_p ())
455838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
455938fd1498Szrj "use not simple.\n");
456038fd1498Szrj return false;
456138fd1498Szrj }
456238fd1498Szrj }
456338fd1498Szrj
456438fd1498Szrj /* If op0 is an external or constant defs use a vector type of
456538fd1498Szrj the same size as the output vector type. */
456638fd1498Szrj if (!vectype_in)
456738fd1498Szrj vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
456838fd1498Szrj if (vec_stmt)
456938fd1498Szrj gcc_assert (vectype_in);
457038fd1498Szrj if (!vectype_in)
457138fd1498Szrj {
457238fd1498Szrj if (dump_enabled_p ())
457338fd1498Szrj {
457438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
457538fd1498Szrj "no vectype for scalar type ");
457638fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
457738fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
457838fd1498Szrj }
457938fd1498Szrj
458038fd1498Szrj return false;
458138fd1498Szrj }
458238fd1498Szrj
458338fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
458438fd1498Szrj && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
458538fd1498Szrj {
458638fd1498Szrj if (dump_enabled_p ())
458738fd1498Szrj {
458838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
458938fd1498Szrj "can't convert between boolean and non "
459038fd1498Szrj "boolean vectors");
459138fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
459238fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
459338fd1498Szrj }
459438fd1498Szrj
459538fd1498Szrj return false;
459638fd1498Szrj }
459738fd1498Szrj
459838fd1498Szrj nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
459938fd1498Szrj nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
460038fd1498Szrj if (known_eq (nunits_out, nunits_in))
460138fd1498Szrj modifier = NONE;
460238fd1498Szrj else if (multiple_p (nunits_out, nunits_in))
460338fd1498Szrj modifier = NARROW;
460438fd1498Szrj else
460538fd1498Szrj {
460638fd1498Szrj gcc_checking_assert (multiple_p (nunits_in, nunits_out));
460738fd1498Szrj modifier = WIDEN;
460838fd1498Szrj }
460938fd1498Szrj
461038fd1498Szrj /* Multiple types in SLP are handled by creating the appropriate number of
461138fd1498Szrj vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
461238fd1498Szrj case of SLP. */
461338fd1498Szrj if (slp_node)
461438fd1498Szrj ncopies = 1;
461538fd1498Szrj else if (modifier == NARROW)
461638fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
461738fd1498Szrj else
461838fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
461938fd1498Szrj
462038fd1498Szrj /* Sanity check: make sure that at least one copy of the vectorized stmt
462138fd1498Szrj needs to be generated. */
462238fd1498Szrj gcc_assert (ncopies >= 1);
462338fd1498Szrj
462438fd1498Szrj bool found_mode = false;
462538fd1498Szrj scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
462638fd1498Szrj scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
462738fd1498Szrj opt_scalar_mode rhs_mode_iter;
462838fd1498Szrj
462938fd1498Szrj /* Supportable by target? */
463038fd1498Szrj switch (modifier)
463138fd1498Szrj {
463238fd1498Szrj case NONE:
463338fd1498Szrj if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
463438fd1498Szrj return false;
463538fd1498Szrj if (supportable_convert_operation (code, vectype_out, vectype_in,
463638fd1498Szrj &decl1, &code1))
463738fd1498Szrj break;
463838fd1498Szrj /* FALLTHRU */
463938fd1498Szrj unsupported:
464038fd1498Szrj if (dump_enabled_p ())
464138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464238fd1498Szrj "conversion not supported by target.\n");
464338fd1498Szrj return false;
464438fd1498Szrj
464538fd1498Szrj case WIDEN:
464638fd1498Szrj if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
464738fd1498Szrj &code1, &code2, &multi_step_cvt,
464838fd1498Szrj &interm_types))
464938fd1498Szrj {
465038fd1498Szrj /* Binary widening operation can only be supported directly by the
465138fd1498Szrj architecture. */
465238fd1498Szrj gcc_assert (!(multi_step_cvt && op_type == binary_op));
465338fd1498Szrj break;
465438fd1498Szrj }
465538fd1498Szrj
465638fd1498Szrj if (code != FLOAT_EXPR
465738fd1498Szrj || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
465838fd1498Szrj goto unsupported;
465938fd1498Szrj
466038fd1498Szrj fltsz = GET_MODE_SIZE (lhs_mode);
466138fd1498Szrj FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
466238fd1498Szrj {
466338fd1498Szrj rhs_mode = rhs_mode_iter.require ();
466438fd1498Szrj if (GET_MODE_SIZE (rhs_mode) > fltsz)
466538fd1498Szrj break;
466638fd1498Szrj
466738fd1498Szrj cvt_type
466838fd1498Szrj = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
466938fd1498Szrj cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
467038fd1498Szrj if (cvt_type == NULL_TREE)
467138fd1498Szrj goto unsupported;
467238fd1498Szrj
467338fd1498Szrj if (GET_MODE_SIZE (rhs_mode) == fltsz)
467438fd1498Szrj {
467538fd1498Szrj if (!supportable_convert_operation (code, vectype_out,
467638fd1498Szrj cvt_type, &decl1, &codecvt1))
467738fd1498Szrj goto unsupported;
467838fd1498Szrj }
467938fd1498Szrj else if (!supportable_widening_operation (code, stmt, vectype_out,
468038fd1498Szrj cvt_type, &codecvt1,
468138fd1498Szrj &codecvt2, &multi_step_cvt,
468238fd1498Szrj &interm_types))
468338fd1498Szrj continue;
468438fd1498Szrj else
468538fd1498Szrj gcc_assert (multi_step_cvt == 0);
468638fd1498Szrj
468738fd1498Szrj if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
468838fd1498Szrj vectype_in, &code1, &code2,
468938fd1498Szrj &multi_step_cvt, &interm_types))
469038fd1498Szrj {
469138fd1498Szrj found_mode = true;
469238fd1498Szrj break;
469338fd1498Szrj }
469438fd1498Szrj }
469538fd1498Szrj
469638fd1498Szrj if (!found_mode)
469738fd1498Szrj goto unsupported;
469838fd1498Szrj
469938fd1498Szrj if (GET_MODE_SIZE (rhs_mode) == fltsz)
470038fd1498Szrj codecvt2 = ERROR_MARK;
470138fd1498Szrj else
470238fd1498Szrj {
470338fd1498Szrj multi_step_cvt++;
470438fd1498Szrj interm_types.safe_push (cvt_type);
470538fd1498Szrj cvt_type = NULL_TREE;
470638fd1498Szrj }
470738fd1498Szrj break;
470838fd1498Szrj
470938fd1498Szrj case NARROW:
471038fd1498Szrj gcc_assert (op_type == unary_op);
471138fd1498Szrj if (supportable_narrowing_operation (code, vectype_out, vectype_in,
471238fd1498Szrj &code1, &multi_step_cvt,
471338fd1498Szrj &interm_types))
471438fd1498Szrj break;
471538fd1498Szrj
471638fd1498Szrj if (code != FIX_TRUNC_EXPR
471738fd1498Szrj || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
471838fd1498Szrj goto unsupported;
471938fd1498Szrj
472038fd1498Szrj cvt_type
472138fd1498Szrj = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
472238fd1498Szrj cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
472338fd1498Szrj if (cvt_type == NULL_TREE)
472438fd1498Szrj goto unsupported;
472538fd1498Szrj if (!supportable_convert_operation (code, cvt_type, vectype_in,
472638fd1498Szrj &decl1, &codecvt1))
472738fd1498Szrj goto unsupported;
472838fd1498Szrj if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
472938fd1498Szrj &code1, &multi_step_cvt,
473038fd1498Szrj &interm_types))
473138fd1498Szrj break;
473238fd1498Szrj goto unsupported;
473338fd1498Szrj
473438fd1498Szrj default:
473538fd1498Szrj gcc_unreachable ();
473638fd1498Szrj }
473738fd1498Szrj
473838fd1498Szrj if (!vec_stmt) /* transformation not required. */
473938fd1498Szrj {
474038fd1498Szrj if (dump_enabled_p ())
474138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
474238fd1498Szrj "=== vectorizable_conversion ===\n");
474338fd1498Szrj if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
474438fd1498Szrj {
474538fd1498Szrj STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
474638fd1498Szrj if (!slp_node)
474738fd1498Szrj vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
474838fd1498Szrj }
474938fd1498Szrj else if (modifier == NARROW)
475038fd1498Szrj {
475138fd1498Szrj STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
475238fd1498Szrj if (!slp_node)
475338fd1498Szrj vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
475438fd1498Szrj }
475538fd1498Szrj else
475638fd1498Szrj {
475738fd1498Szrj STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
475838fd1498Szrj if (!slp_node)
475938fd1498Szrj vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
476038fd1498Szrj }
476138fd1498Szrj interm_types.release ();
476238fd1498Szrj return true;
476338fd1498Szrj }
476438fd1498Szrj
476538fd1498Szrj /* Transform. */
476638fd1498Szrj if (dump_enabled_p ())
476738fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
476838fd1498Szrj "transform conversion. ncopies = %d.\n", ncopies);
476938fd1498Szrj
477038fd1498Szrj if (op_type == binary_op)
477138fd1498Szrj {
477238fd1498Szrj if (CONSTANT_CLASS_P (op0))
477338fd1498Szrj op0 = fold_convert (TREE_TYPE (op1), op0);
477438fd1498Szrj else if (CONSTANT_CLASS_P (op1))
477538fd1498Szrj op1 = fold_convert (TREE_TYPE (op0), op1);
477638fd1498Szrj }
477738fd1498Szrj
477838fd1498Szrj /* In case of multi-step conversion, we first generate conversion operations
477938fd1498Szrj to the intermediate types, and then from that types to the final one.
478038fd1498Szrj We create vector destinations for the intermediate type (TYPES) received
478138fd1498Szrj from supportable_*_operation, and store them in the correct order
478238fd1498Szrj for future use in vect_create_vectorized_*_stmts (). */
478338fd1498Szrj auto_vec<tree> vec_dsts (multi_step_cvt + 1);
478438fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest,
478538fd1498Szrj (cvt_type && modifier == WIDEN)
478638fd1498Szrj ? cvt_type : vectype_out);
478738fd1498Szrj vec_dsts.quick_push (vec_dest);
478838fd1498Szrj
478938fd1498Szrj if (multi_step_cvt)
479038fd1498Szrj {
479138fd1498Szrj for (i = interm_types.length () - 1;
479238fd1498Szrj interm_types.iterate (i, &intermediate_type); i--)
479338fd1498Szrj {
479438fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest,
479538fd1498Szrj intermediate_type);
479638fd1498Szrj vec_dsts.quick_push (vec_dest);
479738fd1498Szrj }
479838fd1498Szrj }
479938fd1498Szrj
480038fd1498Szrj if (cvt_type)
480138fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest,
480238fd1498Szrj modifier == WIDEN
480338fd1498Szrj ? vectype_out : cvt_type);
480438fd1498Szrj
480538fd1498Szrj if (!slp_node)
480638fd1498Szrj {
480738fd1498Szrj if (modifier == WIDEN)
480838fd1498Szrj {
480938fd1498Szrj vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
481038fd1498Szrj if (op_type == binary_op)
481138fd1498Szrj vec_oprnds1.create (1);
481238fd1498Szrj }
481338fd1498Szrj else if (modifier == NARROW)
481438fd1498Szrj vec_oprnds0.create (
481538fd1498Szrj 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
481638fd1498Szrj }
481738fd1498Szrj else if (code == WIDEN_LSHIFT_EXPR)
481838fd1498Szrj vec_oprnds1.create (slp_node->vec_stmts_size);
481938fd1498Szrj
482038fd1498Szrj last_oprnd = op0;
482138fd1498Szrj prev_stmt_info = NULL;
482238fd1498Szrj switch (modifier)
482338fd1498Szrj {
482438fd1498Szrj case NONE:
482538fd1498Szrj for (j = 0; j < ncopies; j++)
482638fd1498Szrj {
482738fd1498Szrj if (j == 0)
482838fd1498Szrj vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
482938fd1498Szrj else
483038fd1498Szrj vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
483138fd1498Szrj
483238fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
483338fd1498Szrj {
483438fd1498Szrj /* Arguments are ready, create the new vector stmt. */
483538fd1498Szrj if (code1 == CALL_EXPR)
483638fd1498Szrj {
483738fd1498Szrj new_stmt = gimple_build_call (decl1, 1, vop0);
483838fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
483938fd1498Szrj gimple_call_set_lhs (new_stmt, new_temp);
484038fd1498Szrj }
484138fd1498Szrj else
484238fd1498Szrj {
484338fd1498Szrj gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
484438fd1498Szrj new_stmt = gimple_build_assign (vec_dest, code1, vop0);
484538fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
484638fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
484738fd1498Szrj }
484838fd1498Szrj
484938fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
485038fd1498Szrj if (slp_node)
485138fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
485238fd1498Szrj else
485338fd1498Szrj {
485438fd1498Szrj if (!prev_stmt_info)
485538fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
485638fd1498Szrj else
485738fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
485838fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
485938fd1498Szrj }
486038fd1498Szrj }
486138fd1498Szrj }
486238fd1498Szrj break;
486338fd1498Szrj
486438fd1498Szrj case WIDEN:
486538fd1498Szrj /* In case the vectorization factor (VF) is bigger than the number
486638fd1498Szrj of elements that we can fit in a vectype (nunits), we have to
486738fd1498Szrj generate more than one vector stmt - i.e - we need to "unroll"
486838fd1498Szrj the vector stmt by a factor VF/nunits. */
486938fd1498Szrj for (j = 0; j < ncopies; j++)
487038fd1498Szrj {
487138fd1498Szrj /* Handle uses. */
487238fd1498Szrj if (j == 0)
487338fd1498Szrj {
487438fd1498Szrj if (slp_node)
487538fd1498Szrj {
487638fd1498Szrj if (code == WIDEN_LSHIFT_EXPR)
487738fd1498Szrj {
487838fd1498Szrj unsigned int k;
487938fd1498Szrj
488038fd1498Szrj vec_oprnd1 = op1;
488138fd1498Szrj /* Store vec_oprnd1 for every vector stmt to be created
488238fd1498Szrj for SLP_NODE. We check during the analysis that all
488338fd1498Szrj the shift arguments are the same. */
488438fd1498Szrj for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
488538fd1498Szrj vec_oprnds1.quick_push (vec_oprnd1);
488638fd1498Szrj
488738fd1498Szrj vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
488838fd1498Szrj slp_node);
488938fd1498Szrj }
489038fd1498Szrj else
489138fd1498Szrj vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
489238fd1498Szrj &vec_oprnds1, slp_node);
489338fd1498Szrj }
489438fd1498Szrj else
489538fd1498Szrj {
489638fd1498Szrj vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
489738fd1498Szrj vec_oprnds0.quick_push (vec_oprnd0);
489838fd1498Szrj if (op_type == binary_op)
489938fd1498Szrj {
490038fd1498Szrj if (code == WIDEN_LSHIFT_EXPR)
490138fd1498Szrj vec_oprnd1 = op1;
490238fd1498Szrj else
490338fd1498Szrj vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
490438fd1498Szrj vec_oprnds1.quick_push (vec_oprnd1);
490538fd1498Szrj }
490638fd1498Szrj }
490738fd1498Szrj }
490838fd1498Szrj else
490938fd1498Szrj {
491038fd1498Szrj vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
491138fd1498Szrj vec_oprnds0.truncate (0);
491238fd1498Szrj vec_oprnds0.quick_push (vec_oprnd0);
491338fd1498Szrj if (op_type == binary_op)
491438fd1498Szrj {
491538fd1498Szrj if (code == WIDEN_LSHIFT_EXPR)
491638fd1498Szrj vec_oprnd1 = op1;
491738fd1498Szrj else
491838fd1498Szrj vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
491938fd1498Szrj vec_oprnd1);
492038fd1498Szrj vec_oprnds1.truncate (0);
492138fd1498Szrj vec_oprnds1.quick_push (vec_oprnd1);
492238fd1498Szrj }
492338fd1498Szrj }
492438fd1498Szrj
492538fd1498Szrj /* Arguments are ready. Create the new vector stmts. */
492638fd1498Szrj for (i = multi_step_cvt; i >= 0; i--)
492738fd1498Szrj {
492838fd1498Szrj tree this_dest = vec_dsts[i];
492938fd1498Szrj enum tree_code c1 = code1, c2 = code2;
493038fd1498Szrj if (i == 0 && codecvt2 != ERROR_MARK)
493138fd1498Szrj {
493238fd1498Szrj c1 = codecvt1;
493338fd1498Szrj c2 = codecvt2;
493438fd1498Szrj }
493538fd1498Szrj vect_create_vectorized_promotion_stmts (&vec_oprnds0,
493638fd1498Szrj &vec_oprnds1,
493738fd1498Szrj stmt, this_dest, gsi,
493838fd1498Szrj c1, c2, decl1, decl2,
493938fd1498Szrj op_type);
494038fd1498Szrj }
494138fd1498Szrj
494238fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
494338fd1498Szrj {
494438fd1498Szrj if (cvt_type)
494538fd1498Szrj {
494638fd1498Szrj if (codecvt1 == CALL_EXPR)
494738fd1498Szrj {
494838fd1498Szrj new_stmt = gimple_build_call (decl1, 1, vop0);
494938fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
495038fd1498Szrj gimple_call_set_lhs (new_stmt, new_temp);
495138fd1498Szrj }
495238fd1498Szrj else
495338fd1498Szrj {
495438fd1498Szrj gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
495538fd1498Szrj new_temp = make_ssa_name (vec_dest);
495638fd1498Szrj new_stmt = gimple_build_assign (new_temp, codecvt1,
495738fd1498Szrj vop0);
495838fd1498Szrj }
495938fd1498Szrj
496038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
496138fd1498Szrj }
496238fd1498Szrj else
496338fd1498Szrj new_stmt = SSA_NAME_DEF_STMT (vop0);
496438fd1498Szrj
496538fd1498Szrj if (slp_node)
496638fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
496738fd1498Szrj else
496838fd1498Szrj {
496938fd1498Szrj if (!prev_stmt_info)
497038fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
497138fd1498Szrj else
497238fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
497338fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
497438fd1498Szrj }
497538fd1498Szrj }
497638fd1498Szrj }
497738fd1498Szrj
497838fd1498Szrj *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
497938fd1498Szrj break;
498038fd1498Szrj
498138fd1498Szrj case NARROW:
498238fd1498Szrj /* In case the vectorization factor (VF) is bigger than the number
498338fd1498Szrj of elements that we can fit in a vectype (nunits), we have to
498438fd1498Szrj generate more than one vector stmt - i.e - we need to "unroll"
498538fd1498Szrj the vector stmt by a factor VF/nunits. */
498638fd1498Szrj for (j = 0; j < ncopies; j++)
498738fd1498Szrj {
498838fd1498Szrj /* Handle uses. */
498938fd1498Szrj if (slp_node)
499038fd1498Szrj vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
499138fd1498Szrj slp_node);
499238fd1498Szrj else
499338fd1498Szrj {
499438fd1498Szrj vec_oprnds0.truncate (0);
499538fd1498Szrj vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
499638fd1498Szrj vect_pow2 (multi_step_cvt) - 1);
499738fd1498Szrj }
499838fd1498Szrj
499938fd1498Szrj /* Arguments are ready. Create the new vector stmts. */
500038fd1498Szrj if (cvt_type)
500138fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
500238fd1498Szrj {
500338fd1498Szrj if (codecvt1 == CALL_EXPR)
500438fd1498Szrj {
500538fd1498Szrj new_stmt = gimple_build_call (decl1, 1, vop0);
500638fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
500738fd1498Szrj gimple_call_set_lhs (new_stmt, new_temp);
500838fd1498Szrj }
500938fd1498Szrj else
501038fd1498Szrj {
501138fd1498Szrj gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
501238fd1498Szrj new_temp = make_ssa_name (vec_dest);
501338fd1498Szrj new_stmt = gimple_build_assign (new_temp, codecvt1,
501438fd1498Szrj vop0);
501538fd1498Szrj }
501638fd1498Szrj
501738fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
501838fd1498Szrj vec_oprnds0[i] = new_temp;
501938fd1498Szrj }
502038fd1498Szrj
502138fd1498Szrj vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
502238fd1498Szrj stmt, vec_dsts, gsi,
502338fd1498Szrj slp_node, code1,
502438fd1498Szrj &prev_stmt_info);
502538fd1498Szrj }
502638fd1498Szrj
502738fd1498Szrj *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
502838fd1498Szrj break;
502938fd1498Szrj }
503038fd1498Szrj
503138fd1498Szrj vec_oprnds0.release ();
503238fd1498Szrj vec_oprnds1.release ();
503338fd1498Szrj interm_types.release ();
503438fd1498Szrj
503538fd1498Szrj return true;
503638fd1498Szrj }
503738fd1498Szrj
503838fd1498Szrj
503938fd1498Szrj /* Function vectorizable_assignment.
504038fd1498Szrj
504138fd1498Szrj Check if STMT performs an assignment (copy) that can be vectorized.
504238fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
504338fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at BSI.
504438fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
504538fd1498Szrj
504638fd1498Szrj static bool
vectorizable_assignment(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)504738fd1498Szrj vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
504838fd1498Szrj gimple **vec_stmt, slp_tree slp_node)
504938fd1498Szrj {
505038fd1498Szrj tree vec_dest;
505138fd1498Szrj tree scalar_dest;
505238fd1498Szrj tree op;
505338fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
505438fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
505538fd1498Szrj tree new_temp;
505638fd1498Szrj gimple *def_stmt;
505738fd1498Szrj enum vect_def_type dt[1] = {vect_unknown_def_type};
505838fd1498Szrj int ndts = 1;
505938fd1498Szrj int ncopies;
506038fd1498Szrj int i, j;
506138fd1498Szrj vec<tree> vec_oprnds = vNULL;
506238fd1498Szrj tree vop;
506338fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
506438fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
506538fd1498Szrj gimple *new_stmt = NULL;
506638fd1498Szrj stmt_vec_info prev_stmt_info = NULL;
506738fd1498Szrj enum tree_code code;
506838fd1498Szrj tree vectype_in;
506938fd1498Szrj
507038fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
507138fd1498Szrj return false;
507238fd1498Szrj
507338fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
507438fd1498Szrj && ! vec_stmt)
507538fd1498Szrj return false;
507638fd1498Szrj
507738fd1498Szrj /* Is vectorizable assignment? */
507838fd1498Szrj if (!is_gimple_assign (stmt))
507938fd1498Szrj return false;
508038fd1498Szrj
508138fd1498Szrj scalar_dest = gimple_assign_lhs (stmt);
508238fd1498Szrj if (TREE_CODE (scalar_dest) != SSA_NAME)
508338fd1498Szrj return false;
508438fd1498Szrj
508538fd1498Szrj code = gimple_assign_rhs_code (stmt);
508638fd1498Szrj if (gimple_assign_single_p (stmt)
508738fd1498Szrj || code == PAREN_EXPR
508838fd1498Szrj || CONVERT_EXPR_CODE_P (code))
508938fd1498Szrj op = gimple_assign_rhs1 (stmt);
509038fd1498Szrj else
509138fd1498Szrj return false;
509238fd1498Szrj
509338fd1498Szrj if (code == VIEW_CONVERT_EXPR)
509438fd1498Szrj op = TREE_OPERAND (op, 0);
509538fd1498Szrj
509638fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
509738fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
509838fd1498Szrj
509938fd1498Szrj /* Multiple types in SLP are handled by creating the appropriate number of
510038fd1498Szrj vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
510138fd1498Szrj case of SLP. */
510238fd1498Szrj if (slp_node)
510338fd1498Szrj ncopies = 1;
510438fd1498Szrj else
510538fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
510638fd1498Szrj
510738fd1498Szrj gcc_assert (ncopies >= 1);
510838fd1498Szrj
510938fd1498Szrj if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
511038fd1498Szrj {
511138fd1498Szrj if (dump_enabled_p ())
511238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
511338fd1498Szrj "use not simple.\n");
511438fd1498Szrj return false;
511538fd1498Szrj }
511638fd1498Szrj
511738fd1498Szrj /* We can handle NOP_EXPR conversions that do not change the number
511838fd1498Szrj of elements or the vector size. */
511938fd1498Szrj if ((CONVERT_EXPR_CODE_P (code)
512038fd1498Szrj || code == VIEW_CONVERT_EXPR)
512138fd1498Szrj && (!vectype_in
512238fd1498Szrj || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
512338fd1498Szrj || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
512438fd1498Szrj GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
512538fd1498Szrj return false;
512638fd1498Szrj
512738fd1498Szrj /* We do not handle bit-precision changes. */
512838fd1498Szrj if ((CONVERT_EXPR_CODE_P (code)
512938fd1498Szrj || code == VIEW_CONVERT_EXPR)
513038fd1498Szrj && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
513138fd1498Szrj && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
513238fd1498Szrj || !type_has_mode_precision_p (TREE_TYPE (op)))
513338fd1498Szrj /* But a conversion that does not change the bit-pattern is ok. */
513438fd1498Szrj && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
513538fd1498Szrj > TYPE_PRECISION (TREE_TYPE (op)))
513638fd1498Szrj && TYPE_UNSIGNED (TREE_TYPE (op)))
513738fd1498Szrj /* Conversion between boolean types of different sizes is
513838fd1498Szrj a simple assignment in case their vectypes are same
513938fd1498Szrj boolean vectors. */
514038fd1498Szrj && (!VECTOR_BOOLEAN_TYPE_P (vectype)
514138fd1498Szrj || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
514238fd1498Szrj {
514338fd1498Szrj if (dump_enabled_p ())
514438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514538fd1498Szrj "type conversion to/from bit-precision "
514638fd1498Szrj "unsupported.\n");
514738fd1498Szrj return false;
514838fd1498Szrj }
514938fd1498Szrj
515038fd1498Szrj if (!vec_stmt) /* transformation not required. */
515138fd1498Szrj {
515238fd1498Szrj STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
515338fd1498Szrj if (dump_enabled_p ())
515438fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
515538fd1498Szrj "=== vectorizable_assignment ===\n");
515638fd1498Szrj if (!slp_node)
515738fd1498Szrj vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
515838fd1498Szrj return true;
515938fd1498Szrj }
516038fd1498Szrj
516138fd1498Szrj /* Transform. */
516238fd1498Szrj if (dump_enabled_p ())
516338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
516438fd1498Szrj
516538fd1498Szrj /* Handle def. */
516638fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype);
516738fd1498Szrj
516838fd1498Szrj /* Handle use. */
516938fd1498Szrj for (j = 0; j < ncopies; j++)
517038fd1498Szrj {
517138fd1498Szrj /* Handle uses. */
517238fd1498Szrj if (j == 0)
517338fd1498Szrj vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
517438fd1498Szrj else
517538fd1498Szrj vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
517638fd1498Szrj
517738fd1498Szrj /* Arguments are ready. create the new vector stmt. */
517838fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
517938fd1498Szrj {
518038fd1498Szrj if (CONVERT_EXPR_CODE_P (code)
518138fd1498Szrj || code == VIEW_CONVERT_EXPR)
518238fd1498Szrj vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
518338fd1498Szrj new_stmt = gimple_build_assign (vec_dest, vop);
518438fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
518538fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
518638fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
518738fd1498Szrj if (slp_node)
518838fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
518938fd1498Szrj }
519038fd1498Szrj
519138fd1498Szrj if (slp_node)
519238fd1498Szrj continue;
519338fd1498Szrj
519438fd1498Szrj if (j == 0)
519538fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
519638fd1498Szrj else
519738fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
519838fd1498Szrj
519938fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
520038fd1498Szrj }
520138fd1498Szrj
520238fd1498Szrj vec_oprnds.release ();
520338fd1498Szrj return true;
520438fd1498Szrj }
520538fd1498Szrj
520638fd1498Szrj
520738fd1498Szrj /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
520838fd1498Szrj either as shift by a scalar or by a vector. */
520938fd1498Szrj
521038fd1498Szrj bool
vect_supportable_shift(enum tree_code code,tree scalar_type)521138fd1498Szrj vect_supportable_shift (enum tree_code code, tree scalar_type)
521238fd1498Szrj {
521338fd1498Szrj
521438fd1498Szrj machine_mode vec_mode;
521538fd1498Szrj optab optab;
521638fd1498Szrj int icode;
521738fd1498Szrj tree vectype;
521838fd1498Szrj
521938fd1498Szrj vectype = get_vectype_for_scalar_type (scalar_type);
522038fd1498Szrj if (!vectype)
522138fd1498Szrj return false;
522238fd1498Szrj
522338fd1498Szrj optab = optab_for_tree_code (code, vectype, optab_scalar);
522438fd1498Szrj if (!optab
522538fd1498Szrj || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
522638fd1498Szrj {
522738fd1498Szrj optab = optab_for_tree_code (code, vectype, optab_vector);
522838fd1498Szrj if (!optab
522938fd1498Szrj || (optab_handler (optab, TYPE_MODE (vectype))
523038fd1498Szrj == CODE_FOR_nothing))
523138fd1498Szrj return false;
523238fd1498Szrj }
523338fd1498Szrj
523438fd1498Szrj vec_mode = TYPE_MODE (vectype);
523538fd1498Szrj icode = (int) optab_handler (optab, vec_mode);
523638fd1498Szrj if (icode == CODE_FOR_nothing)
523738fd1498Szrj return false;
523838fd1498Szrj
523938fd1498Szrj return true;
524038fd1498Szrj }
524138fd1498Szrj
524238fd1498Szrj
524338fd1498Szrj /* Function vectorizable_shift.
524438fd1498Szrj
524538fd1498Szrj Check if STMT performs a shift operation that can be vectorized.
524638fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
524738fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at BSI.
524838fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
524938fd1498Szrj
525038fd1498Szrj static bool
vectorizable_shift(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)525138fd1498Szrj vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
525238fd1498Szrj gimple **vec_stmt, slp_tree slp_node)
525338fd1498Szrj {
525438fd1498Szrj tree vec_dest;
525538fd1498Szrj tree scalar_dest;
525638fd1498Szrj tree op0, op1 = NULL;
525738fd1498Szrj tree vec_oprnd1 = NULL_TREE;
525838fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
525938fd1498Szrj tree vectype;
526038fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
526138fd1498Szrj enum tree_code code;
526238fd1498Szrj machine_mode vec_mode;
526338fd1498Szrj tree new_temp;
526438fd1498Szrj optab optab;
526538fd1498Szrj int icode;
526638fd1498Szrj machine_mode optab_op2_mode;
526738fd1498Szrj gimple *def_stmt;
526838fd1498Szrj enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
526938fd1498Szrj int ndts = 2;
527038fd1498Szrj gimple *new_stmt = NULL;
527138fd1498Szrj stmt_vec_info prev_stmt_info;
527238fd1498Szrj poly_uint64 nunits_in;
527338fd1498Szrj poly_uint64 nunits_out;
527438fd1498Szrj tree vectype_out;
527538fd1498Szrj tree op1_vectype;
527638fd1498Szrj int ncopies;
527738fd1498Szrj int j, i;
527838fd1498Szrj vec<tree> vec_oprnds0 = vNULL;
527938fd1498Szrj vec<tree> vec_oprnds1 = vNULL;
528038fd1498Szrj tree vop0, vop1;
528138fd1498Szrj unsigned int k;
528238fd1498Szrj bool scalar_shift_arg = true;
528338fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
528438fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
528538fd1498Szrj
528638fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
528738fd1498Szrj return false;
528838fd1498Szrj
528938fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
529038fd1498Szrj && ! vec_stmt)
529138fd1498Szrj return false;
529238fd1498Szrj
529338fd1498Szrj /* Is STMT a vectorizable binary/unary operation? */
529438fd1498Szrj if (!is_gimple_assign (stmt))
529538fd1498Szrj return false;
529638fd1498Szrj
529738fd1498Szrj if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
529838fd1498Szrj return false;
529938fd1498Szrj
530038fd1498Szrj code = gimple_assign_rhs_code (stmt);
530138fd1498Szrj
530238fd1498Szrj if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
530338fd1498Szrj || code == RROTATE_EXPR))
530438fd1498Szrj return false;
530538fd1498Szrj
530638fd1498Szrj scalar_dest = gimple_assign_lhs (stmt);
530738fd1498Szrj vectype_out = STMT_VINFO_VECTYPE (stmt_info);
530838fd1498Szrj if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
530938fd1498Szrj {
531038fd1498Szrj if (dump_enabled_p ())
531138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531238fd1498Szrj "bit-precision shifts not supported.\n");
531338fd1498Szrj return false;
531438fd1498Szrj }
531538fd1498Szrj
531638fd1498Szrj op0 = gimple_assign_rhs1 (stmt);
531738fd1498Szrj if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
531838fd1498Szrj {
531938fd1498Szrj if (dump_enabled_p ())
532038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532138fd1498Szrj "use not simple.\n");
532238fd1498Szrj return false;
532338fd1498Szrj }
532438fd1498Szrj /* If op0 is an external or constant def use a vector type with
532538fd1498Szrj the same size as the output vector type. */
532638fd1498Szrj if (!vectype)
532738fd1498Szrj vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
532838fd1498Szrj if (vec_stmt)
532938fd1498Szrj gcc_assert (vectype);
533038fd1498Szrj if (!vectype)
533138fd1498Szrj {
533238fd1498Szrj if (dump_enabled_p ())
533338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
533438fd1498Szrj "no vectype for scalar type\n");
533538fd1498Szrj return false;
533638fd1498Szrj }
533738fd1498Szrj
533838fd1498Szrj nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
533938fd1498Szrj nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
534038fd1498Szrj if (maybe_ne (nunits_out, nunits_in))
534138fd1498Szrj return false;
534238fd1498Szrj
534338fd1498Szrj op1 = gimple_assign_rhs2 (stmt);
534438fd1498Szrj if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
534538fd1498Szrj {
534638fd1498Szrj if (dump_enabled_p ())
534738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
534838fd1498Szrj "use not simple.\n");
534938fd1498Szrj return false;
535038fd1498Szrj }
535138fd1498Szrj
535238fd1498Szrj /* Multiple types in SLP are handled by creating the appropriate number of
535338fd1498Szrj vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
535438fd1498Szrj case of SLP. */
535538fd1498Szrj if (slp_node)
535638fd1498Szrj ncopies = 1;
535738fd1498Szrj else
535838fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
535938fd1498Szrj
536038fd1498Szrj gcc_assert (ncopies >= 1);
536138fd1498Szrj
536238fd1498Szrj /* Determine whether the shift amount is a vector, or scalar. If the
536338fd1498Szrj shift/rotate amount is a vector, use the vector/vector shift optabs. */
536438fd1498Szrj
536538fd1498Szrj if ((dt[1] == vect_internal_def
536638fd1498Szrj || dt[1] == vect_induction_def)
536738fd1498Szrj && !slp_node)
536838fd1498Szrj scalar_shift_arg = false;
536938fd1498Szrj else if (dt[1] == vect_constant_def
537038fd1498Szrj || dt[1] == vect_external_def
537138fd1498Szrj || dt[1] == vect_internal_def)
537238fd1498Szrj {
537338fd1498Szrj /* In SLP, need to check whether the shift count is the same,
537438fd1498Szrj in loops if it is a constant or invariant, it is always
537538fd1498Szrj a scalar shift. */
537638fd1498Szrj if (slp_node)
537738fd1498Szrj {
537838fd1498Szrj vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
537938fd1498Szrj gimple *slpstmt;
538038fd1498Szrj
538138fd1498Szrj FOR_EACH_VEC_ELT (stmts, k, slpstmt)
538238fd1498Szrj if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
538338fd1498Szrj scalar_shift_arg = false;
5384*58e805e6Szrj
5385*58e805e6Szrj /* For internal SLP defs we have to make sure we see scalar stmts
5386*58e805e6Szrj for all vector elements.
5387*58e805e6Szrj ??? For different vectors we could resort to a different
5388*58e805e6Szrj scalar shift operand but code-generation below simply always
5389*58e805e6Szrj takes the first. */
5390*58e805e6Szrj if (dt[1] == vect_internal_def
5391*58e805e6Szrj && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), stmts.length ()))
5392*58e805e6Szrj scalar_shift_arg = false;
539338fd1498Szrj }
539438fd1498Szrj
539538fd1498Szrj /* If the shift amount is computed by a pattern stmt we cannot
539638fd1498Szrj use the scalar amount directly thus give up and use a vector
539738fd1498Szrj shift. */
539838fd1498Szrj if (dt[1] == vect_internal_def)
539938fd1498Szrj {
540038fd1498Szrj gimple *def = SSA_NAME_DEF_STMT (op1);
540138fd1498Szrj if (is_pattern_stmt_p (vinfo_for_stmt (def)))
540238fd1498Szrj scalar_shift_arg = false;
540338fd1498Szrj }
540438fd1498Szrj }
540538fd1498Szrj else
540638fd1498Szrj {
540738fd1498Szrj if (dump_enabled_p ())
540838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540938fd1498Szrj "operand mode requires invariant argument.\n");
541038fd1498Szrj return false;
541138fd1498Szrj }
541238fd1498Szrj
541338fd1498Szrj /* Vector shifted by vector. */
541438fd1498Szrj if (!scalar_shift_arg)
541538fd1498Szrj {
541638fd1498Szrj optab = optab_for_tree_code (code, vectype, optab_vector);
541738fd1498Szrj if (dump_enabled_p ())
541838fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
541938fd1498Szrj "vector/vector shift/rotate found.\n");
542038fd1498Szrj
542138fd1498Szrj if (!op1_vectype)
542238fd1498Szrj op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
542338fd1498Szrj if (op1_vectype == NULL_TREE
542438fd1498Szrj || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
542538fd1498Szrj {
542638fd1498Szrj if (dump_enabled_p ())
542738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
542838fd1498Szrj "unusable type for last operand in"
542938fd1498Szrj " vector/vector shift/rotate.\n");
543038fd1498Szrj return false;
543138fd1498Szrj }
543238fd1498Szrj }
543338fd1498Szrj /* See if the machine has a vector shifted by scalar insn and if not
543438fd1498Szrj then see if it has a vector shifted by vector insn. */
543538fd1498Szrj else
543638fd1498Szrj {
543738fd1498Szrj optab = optab_for_tree_code (code, vectype, optab_scalar);
543838fd1498Szrj if (optab
543938fd1498Szrj && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
544038fd1498Szrj {
544138fd1498Szrj if (dump_enabled_p ())
544238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
544338fd1498Szrj "vector/scalar shift/rotate found.\n");
544438fd1498Szrj }
544538fd1498Szrj else
544638fd1498Szrj {
544738fd1498Szrj optab = optab_for_tree_code (code, vectype, optab_vector);
544838fd1498Szrj if (optab
544938fd1498Szrj && (optab_handler (optab, TYPE_MODE (vectype))
545038fd1498Szrj != CODE_FOR_nothing))
545138fd1498Szrj {
545238fd1498Szrj scalar_shift_arg = false;
545338fd1498Szrj
545438fd1498Szrj if (dump_enabled_p ())
545538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
545638fd1498Szrj "vector/vector shift/rotate found.\n");
545738fd1498Szrj
545838fd1498Szrj /* Unlike the other binary operators, shifts/rotates have
545938fd1498Szrj the rhs being int, instead of the same type as the lhs,
546038fd1498Szrj so make sure the scalar is the right type if we are
546138fd1498Szrj dealing with vectors of long long/long/short/char. */
546238fd1498Szrj if (dt[1] == vect_constant_def)
546338fd1498Szrj op1 = fold_convert (TREE_TYPE (vectype), op1);
546438fd1498Szrj else if (!useless_type_conversion_p (TREE_TYPE (vectype),
546538fd1498Szrj TREE_TYPE (op1)))
546638fd1498Szrj {
546738fd1498Szrj if (slp_node
546838fd1498Szrj && TYPE_MODE (TREE_TYPE (vectype))
546938fd1498Szrj != TYPE_MODE (TREE_TYPE (op1)))
547038fd1498Szrj {
547138fd1498Szrj if (dump_enabled_p ())
547238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
547338fd1498Szrj "unusable type for last operand in"
547438fd1498Szrj " vector/vector shift/rotate.\n");
547538fd1498Szrj return false;
547638fd1498Szrj }
547738fd1498Szrj if (vec_stmt && !slp_node)
547838fd1498Szrj {
547938fd1498Szrj op1 = fold_convert (TREE_TYPE (vectype), op1);
548038fd1498Szrj op1 = vect_init_vector (stmt, op1,
548138fd1498Szrj TREE_TYPE (vectype), NULL);
548238fd1498Szrj }
548338fd1498Szrj }
548438fd1498Szrj }
548538fd1498Szrj }
548638fd1498Szrj }
548738fd1498Szrj
548838fd1498Szrj /* Supportable by target? */
548938fd1498Szrj if (!optab)
549038fd1498Szrj {
549138fd1498Szrj if (dump_enabled_p ())
549238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549338fd1498Szrj "no optab.\n");
549438fd1498Szrj return false;
549538fd1498Szrj }
549638fd1498Szrj vec_mode = TYPE_MODE (vectype);
549738fd1498Szrj icode = (int) optab_handler (optab, vec_mode);
549838fd1498Szrj if (icode == CODE_FOR_nothing)
549938fd1498Szrj {
550038fd1498Szrj if (dump_enabled_p ())
550138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
550238fd1498Szrj "op not supported by target.\n");
550338fd1498Szrj /* Check only during analysis. */
550438fd1498Szrj if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
550538fd1498Szrj || (!vec_stmt
550638fd1498Szrj && !vect_worthwhile_without_simd_p (vinfo, code)))
550738fd1498Szrj return false;
550838fd1498Szrj if (dump_enabled_p ())
550938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
551038fd1498Szrj "proceeding using word mode.\n");
551138fd1498Szrj }
551238fd1498Szrj
551338fd1498Szrj /* Worthwhile without SIMD support? Check only during analysis. */
551438fd1498Szrj if (!vec_stmt
551538fd1498Szrj && !VECTOR_MODE_P (TYPE_MODE (vectype))
551638fd1498Szrj && !vect_worthwhile_without_simd_p (vinfo, code))
551738fd1498Szrj {
551838fd1498Szrj if (dump_enabled_p ())
551938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552038fd1498Szrj "not worthwhile without SIMD support.\n");
552138fd1498Szrj return false;
552238fd1498Szrj }
552338fd1498Szrj
552438fd1498Szrj if (!vec_stmt) /* transformation not required. */
552538fd1498Szrj {
552638fd1498Szrj STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
552738fd1498Szrj if (dump_enabled_p ())
552838fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
552938fd1498Szrj "=== vectorizable_shift ===\n");
553038fd1498Szrj if (!slp_node)
553138fd1498Szrj vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
553238fd1498Szrj return true;
553338fd1498Szrj }
553438fd1498Szrj
553538fd1498Szrj /* Transform. */
553638fd1498Szrj
553738fd1498Szrj if (dump_enabled_p ())
553838fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
553938fd1498Szrj "transform binary/unary operation.\n");
554038fd1498Szrj
554138fd1498Szrj /* Handle def. */
554238fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype);
554338fd1498Szrj
554438fd1498Szrj prev_stmt_info = NULL;
554538fd1498Szrj for (j = 0; j < ncopies; j++)
554638fd1498Szrj {
554738fd1498Szrj /* Handle uses. */
554838fd1498Szrj if (j == 0)
554938fd1498Szrj {
555038fd1498Szrj if (scalar_shift_arg)
555138fd1498Szrj {
555238fd1498Szrj /* Vector shl and shr insn patterns can be defined with scalar
555338fd1498Szrj operand 2 (shift operand). In this case, use constant or loop
555438fd1498Szrj invariant op1 directly, without extending it to vector mode
555538fd1498Szrj first. */
555638fd1498Szrj optab_op2_mode = insn_data[icode].operand[2].mode;
555738fd1498Szrj if (!VECTOR_MODE_P (optab_op2_mode))
555838fd1498Szrj {
555938fd1498Szrj if (dump_enabled_p ())
556038fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
556138fd1498Szrj "operand 1 using scalar mode.\n");
556238fd1498Szrj vec_oprnd1 = op1;
556338fd1498Szrj vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
556438fd1498Szrj vec_oprnds1.quick_push (vec_oprnd1);
556538fd1498Szrj if (slp_node)
556638fd1498Szrj {
556738fd1498Szrj /* Store vec_oprnd1 for every vector stmt to be created
556838fd1498Szrj for SLP_NODE. We check during the analysis that all
556938fd1498Szrj the shift arguments are the same.
557038fd1498Szrj TODO: Allow different constants for different vector
557138fd1498Szrj stmts generated for an SLP instance. */
557238fd1498Szrj for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
557338fd1498Szrj vec_oprnds1.quick_push (vec_oprnd1);
557438fd1498Szrj }
557538fd1498Szrj }
557638fd1498Szrj }
557738fd1498Szrj
557838fd1498Szrj /* vec_oprnd1 is available if operand 1 should be of a scalar-type
557938fd1498Szrj (a special case for certain kind of vector shifts); otherwise,
558038fd1498Szrj operand 1 should be of a vector type (the usual case). */
558138fd1498Szrj if (vec_oprnd1)
558238fd1498Szrj vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
558338fd1498Szrj slp_node);
558438fd1498Szrj else
558538fd1498Szrj vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
558638fd1498Szrj slp_node);
558738fd1498Szrj }
558838fd1498Szrj else
558938fd1498Szrj vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
559038fd1498Szrj
559138fd1498Szrj /* Arguments are ready. Create the new vector stmt. */
559238fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
559338fd1498Szrj {
559438fd1498Szrj vop1 = vec_oprnds1[i];
559538fd1498Szrj new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
559638fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
559738fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
559838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
559938fd1498Szrj if (slp_node)
560038fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
560138fd1498Szrj }
560238fd1498Szrj
560338fd1498Szrj if (slp_node)
560438fd1498Szrj continue;
560538fd1498Szrj
560638fd1498Szrj if (j == 0)
560738fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
560838fd1498Szrj else
560938fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
561038fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
561138fd1498Szrj }
561238fd1498Szrj
561338fd1498Szrj vec_oprnds0.release ();
561438fd1498Szrj vec_oprnds1.release ();
561538fd1498Szrj
561638fd1498Szrj return true;
561738fd1498Szrj }
561838fd1498Szrj
561938fd1498Szrj
562038fd1498Szrj /* Function vectorizable_operation.
562138fd1498Szrj
562238fd1498Szrj Check if STMT performs a binary, unary or ternary operation that can
562338fd1498Szrj be vectorized.
562438fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
562538fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at BSI.
562638fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
562738fd1498Szrj
562838fd1498Szrj static bool
vectorizable_operation(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)562938fd1498Szrj vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
563038fd1498Szrj gimple **vec_stmt, slp_tree slp_node)
563138fd1498Szrj {
563238fd1498Szrj tree vec_dest;
563338fd1498Szrj tree scalar_dest;
563438fd1498Szrj tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
563538fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
563638fd1498Szrj tree vectype;
563738fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
563838fd1498Szrj enum tree_code code, orig_code;
563938fd1498Szrj machine_mode vec_mode;
564038fd1498Szrj tree new_temp;
564138fd1498Szrj int op_type;
564238fd1498Szrj optab optab;
564338fd1498Szrj bool target_support_p;
564438fd1498Szrj gimple *def_stmt;
564538fd1498Szrj enum vect_def_type dt[3]
564638fd1498Szrj = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
564738fd1498Szrj int ndts = 3;
564838fd1498Szrj gimple *new_stmt = NULL;
564938fd1498Szrj stmt_vec_info prev_stmt_info;
565038fd1498Szrj poly_uint64 nunits_in;
565138fd1498Szrj poly_uint64 nunits_out;
565238fd1498Szrj tree vectype_out;
565338fd1498Szrj int ncopies;
565438fd1498Szrj int j, i;
565538fd1498Szrj vec<tree> vec_oprnds0 = vNULL;
565638fd1498Szrj vec<tree> vec_oprnds1 = vNULL;
565738fd1498Szrj vec<tree> vec_oprnds2 = vNULL;
565838fd1498Szrj tree vop0, vop1, vop2;
565938fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
566038fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
566138fd1498Szrj
566238fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
566338fd1498Szrj return false;
566438fd1498Szrj
566538fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
566638fd1498Szrj && ! vec_stmt)
566738fd1498Szrj return false;
566838fd1498Szrj
566938fd1498Szrj /* Is STMT a vectorizable binary/unary operation? */
567038fd1498Szrj if (!is_gimple_assign (stmt))
567138fd1498Szrj return false;
567238fd1498Szrj
567338fd1498Szrj if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
567438fd1498Szrj return false;
567538fd1498Szrj
567638fd1498Szrj orig_code = code = gimple_assign_rhs_code (stmt);
567738fd1498Szrj
567838fd1498Szrj /* For pointer addition and subtraction, we should use the normal
567938fd1498Szrj plus and minus for the vector operation. */
568038fd1498Szrj if (code == POINTER_PLUS_EXPR)
568138fd1498Szrj code = PLUS_EXPR;
568238fd1498Szrj if (code == POINTER_DIFF_EXPR)
568338fd1498Szrj code = MINUS_EXPR;
568438fd1498Szrj
568538fd1498Szrj /* Support only unary or binary operations. */
568638fd1498Szrj op_type = TREE_CODE_LENGTH (code);
568738fd1498Szrj if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
568838fd1498Szrj {
568938fd1498Szrj if (dump_enabled_p ())
569038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
569138fd1498Szrj "num. args = %d (not unary/binary/ternary op).\n",
569238fd1498Szrj op_type);
569338fd1498Szrj return false;
569438fd1498Szrj }
569538fd1498Szrj
569638fd1498Szrj scalar_dest = gimple_assign_lhs (stmt);
569738fd1498Szrj vectype_out = STMT_VINFO_VECTYPE (stmt_info);
569838fd1498Szrj
569938fd1498Szrj /* Most operations cannot handle bit-precision types without extra
570038fd1498Szrj truncations. */
570138fd1498Szrj if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
570238fd1498Szrj && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
570338fd1498Szrj /* Exception are bitwise binary operations. */
570438fd1498Szrj && code != BIT_IOR_EXPR
570538fd1498Szrj && code != BIT_XOR_EXPR
570638fd1498Szrj && code != BIT_AND_EXPR)
570738fd1498Szrj {
570838fd1498Szrj if (dump_enabled_p ())
570938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
571038fd1498Szrj "bit-precision arithmetic not supported.\n");
571138fd1498Szrj return false;
571238fd1498Szrj }
571338fd1498Szrj
571438fd1498Szrj op0 = gimple_assign_rhs1 (stmt);
571538fd1498Szrj if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
571638fd1498Szrj {
571738fd1498Szrj if (dump_enabled_p ())
571838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
571938fd1498Szrj "use not simple.\n");
572038fd1498Szrj return false;
572138fd1498Szrj }
572238fd1498Szrj /* If op0 is an external or constant def use a vector type with
572338fd1498Szrj the same size as the output vector type. */
572438fd1498Szrj if (!vectype)
572538fd1498Szrj {
572638fd1498Szrj /* For boolean type we cannot determine vectype by
572738fd1498Szrj invariant value (don't know whether it is a vector
572838fd1498Szrj of booleans or vector of integers). We use output
572938fd1498Szrj vectype because operations on boolean don't change
573038fd1498Szrj type. */
573138fd1498Szrj if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
573238fd1498Szrj {
573338fd1498Szrj if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
573438fd1498Szrj {
573538fd1498Szrj if (dump_enabled_p ())
573638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
573738fd1498Szrj "not supported operation on bool value.\n");
573838fd1498Szrj return false;
573938fd1498Szrj }
574038fd1498Szrj vectype = vectype_out;
574138fd1498Szrj }
574238fd1498Szrj else
574338fd1498Szrj vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
574438fd1498Szrj }
574538fd1498Szrj if (vec_stmt)
574638fd1498Szrj gcc_assert (vectype);
574738fd1498Szrj if (!vectype)
574838fd1498Szrj {
574938fd1498Szrj if (dump_enabled_p ())
575038fd1498Szrj {
575138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
575238fd1498Szrj "no vectype for scalar type ");
575338fd1498Szrj dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
575438fd1498Szrj TREE_TYPE (op0));
575538fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
575638fd1498Szrj }
575738fd1498Szrj
575838fd1498Szrj return false;
575938fd1498Szrj }
576038fd1498Szrj
576138fd1498Szrj nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
576238fd1498Szrj nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
576338fd1498Szrj if (maybe_ne (nunits_out, nunits_in))
576438fd1498Szrj return false;
576538fd1498Szrj
576638fd1498Szrj if (op_type == binary_op || op_type == ternary_op)
576738fd1498Szrj {
576838fd1498Szrj op1 = gimple_assign_rhs2 (stmt);
576938fd1498Szrj if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
577038fd1498Szrj {
577138fd1498Szrj if (dump_enabled_p ())
577238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
577338fd1498Szrj "use not simple.\n");
577438fd1498Szrj return false;
577538fd1498Szrj }
577638fd1498Szrj }
577738fd1498Szrj if (op_type == ternary_op)
577838fd1498Szrj {
577938fd1498Szrj op2 = gimple_assign_rhs3 (stmt);
578038fd1498Szrj if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
578138fd1498Szrj {
578238fd1498Szrj if (dump_enabled_p ())
578338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
578438fd1498Szrj "use not simple.\n");
578538fd1498Szrj return false;
578638fd1498Szrj }
578738fd1498Szrj }
578838fd1498Szrj
578938fd1498Szrj /* Multiple types in SLP are handled by creating the appropriate number of
579038fd1498Szrj vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
579138fd1498Szrj case of SLP. */
579238fd1498Szrj if (slp_node)
579338fd1498Szrj ncopies = 1;
579438fd1498Szrj else
579538fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
579638fd1498Szrj
579738fd1498Szrj gcc_assert (ncopies >= 1);
579838fd1498Szrj
579938fd1498Szrj /* Shifts are handled in vectorizable_shift (). */
580038fd1498Szrj if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
580138fd1498Szrj || code == RROTATE_EXPR)
580238fd1498Szrj return false;
580338fd1498Szrj
580438fd1498Szrj /* Supportable by target? */
580538fd1498Szrj
580638fd1498Szrj vec_mode = TYPE_MODE (vectype);
580738fd1498Szrj if (code == MULT_HIGHPART_EXPR)
580838fd1498Szrj target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
580938fd1498Szrj else
581038fd1498Szrj {
581138fd1498Szrj optab = optab_for_tree_code (code, vectype, optab_default);
581238fd1498Szrj if (!optab)
581338fd1498Szrj {
581438fd1498Szrj if (dump_enabled_p ())
581538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
581638fd1498Szrj "no optab.\n");
581738fd1498Szrj return false;
581838fd1498Szrj }
581938fd1498Szrj target_support_p = (optab_handler (optab, vec_mode)
582038fd1498Szrj != CODE_FOR_nothing);
582138fd1498Szrj }
582238fd1498Szrj
582338fd1498Szrj if (!target_support_p)
582438fd1498Szrj {
582538fd1498Szrj if (dump_enabled_p ())
582638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582738fd1498Szrj "op not supported by target.\n");
582838fd1498Szrj /* Check only during analysis. */
582938fd1498Szrj if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
583038fd1498Szrj || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
583138fd1498Szrj return false;
583238fd1498Szrj if (dump_enabled_p ())
583338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
583438fd1498Szrj "proceeding using word mode.\n");
583538fd1498Szrj }
583638fd1498Szrj
583738fd1498Szrj /* Worthwhile without SIMD support? Check only during analysis. */
583838fd1498Szrj if (!VECTOR_MODE_P (vec_mode)
583938fd1498Szrj && !vec_stmt
584038fd1498Szrj && !vect_worthwhile_without_simd_p (vinfo, code))
584138fd1498Szrj {
584238fd1498Szrj if (dump_enabled_p ())
584338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584438fd1498Szrj "not worthwhile without SIMD support.\n");
584538fd1498Szrj return false;
584638fd1498Szrj }
584738fd1498Szrj
584838fd1498Szrj if (!vec_stmt) /* transformation not required. */
584938fd1498Szrj {
585038fd1498Szrj STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
585138fd1498Szrj if (dump_enabled_p ())
585238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
585338fd1498Szrj "=== vectorizable_operation ===\n");
585438fd1498Szrj if (!slp_node)
585538fd1498Szrj vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
585638fd1498Szrj return true;
585738fd1498Szrj }
585838fd1498Szrj
585938fd1498Szrj /* Transform. */
586038fd1498Szrj
586138fd1498Szrj if (dump_enabled_p ())
586238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
586338fd1498Szrj "transform binary/unary operation.\n");
586438fd1498Szrj
586538fd1498Szrj /* Handle def. */
586638fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype);
586738fd1498Szrj
586838fd1498Szrj /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
586938fd1498Szrj vectors with unsigned elements, but the result is signed. So, we
587038fd1498Szrj need to compute the MINUS_EXPR into vectype temporary and
587138fd1498Szrj VIEW_CONVERT_EXPR it into the final vectype_out result. */
587238fd1498Szrj tree vec_cvt_dest = NULL_TREE;
587338fd1498Szrj if (orig_code == POINTER_DIFF_EXPR)
587438fd1498Szrj vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
587538fd1498Szrj
587638fd1498Szrj /* In case the vectorization factor (VF) is bigger than the number
587738fd1498Szrj of elements that we can fit in a vectype (nunits), we have to generate
587838fd1498Szrj more than one vector stmt - i.e - we need to "unroll" the
587938fd1498Szrj vector stmt by a factor VF/nunits. In doing so, we record a pointer
588038fd1498Szrj from one copy of the vector stmt to the next, in the field
588138fd1498Szrj STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
588238fd1498Szrj stages to find the correct vector defs to be used when vectorizing
588338fd1498Szrj stmts that use the defs of the current stmt. The example below
588438fd1498Szrj illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
588538fd1498Szrj we need to create 4 vectorized stmts):
588638fd1498Szrj
588738fd1498Szrj before vectorization:
588838fd1498Szrj RELATED_STMT VEC_STMT
588938fd1498Szrj S1: x = memref - -
589038fd1498Szrj S2: z = x + 1 - -
589138fd1498Szrj
589238fd1498Szrj step 1: vectorize stmt S1 (done in vectorizable_load. See more details
589338fd1498Szrj there):
589438fd1498Szrj RELATED_STMT VEC_STMT
589538fd1498Szrj VS1_0: vx0 = memref0 VS1_1 -
589638fd1498Szrj VS1_1: vx1 = memref1 VS1_2 -
589738fd1498Szrj VS1_2: vx2 = memref2 VS1_3 -
589838fd1498Szrj VS1_3: vx3 = memref3 - -
589938fd1498Szrj S1: x = load - VS1_0
590038fd1498Szrj S2: z = x + 1 - -
590138fd1498Szrj
590238fd1498Szrj step2: vectorize stmt S2 (done here):
590338fd1498Szrj To vectorize stmt S2 we first need to find the relevant vector
590438fd1498Szrj def for the first operand 'x'. This is, as usual, obtained from
590538fd1498Szrj the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
590638fd1498Szrj that defines 'x' (S1). This way we find the stmt VS1_0, and the
590738fd1498Szrj relevant vector def 'vx0'. Having found 'vx0' we can generate
590838fd1498Szrj the vector stmt VS2_0, and as usual, record it in the
590938fd1498Szrj STMT_VINFO_VEC_STMT of stmt S2.
591038fd1498Szrj When creating the second copy (VS2_1), we obtain the relevant vector
591138fd1498Szrj def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
591238fd1498Szrj stmt VS1_0. This way we find the stmt VS1_1 and the relevant
591338fd1498Szrj vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
591438fd1498Szrj pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
591538fd1498Szrj Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
591638fd1498Szrj chain of stmts and pointers:
591738fd1498Szrj RELATED_STMT VEC_STMT
591838fd1498Szrj VS1_0: vx0 = memref0 VS1_1 -
591938fd1498Szrj VS1_1: vx1 = memref1 VS1_2 -
592038fd1498Szrj VS1_2: vx2 = memref2 VS1_3 -
592138fd1498Szrj VS1_3: vx3 = memref3 - -
592238fd1498Szrj S1: x = load - VS1_0
592338fd1498Szrj VS2_0: vz0 = vx0 + v1 VS2_1 -
592438fd1498Szrj VS2_1: vz1 = vx1 + v1 VS2_2 -
592538fd1498Szrj VS2_2: vz2 = vx2 + v1 VS2_3 -
592638fd1498Szrj VS2_3: vz3 = vx3 + v1 - -
592738fd1498Szrj S2: z = x + 1 - VS2_0 */
592838fd1498Szrj
592938fd1498Szrj prev_stmt_info = NULL;
593038fd1498Szrj for (j = 0; j < ncopies; j++)
593138fd1498Szrj {
593238fd1498Szrj /* Handle uses. */
593338fd1498Szrj if (j == 0)
593438fd1498Szrj {
5935*58e805e6Szrj if (op_type == binary_op)
593638fd1498Szrj vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
593738fd1498Szrj slp_node);
5938*58e805e6Szrj else if (op_type == ternary_op)
5939*58e805e6Szrj {
5940*58e805e6Szrj if (slp_node)
5941*58e805e6Szrj {
5942*58e805e6Szrj auto_vec<tree> ops(3);
5943*58e805e6Szrj ops.quick_push (op0);
5944*58e805e6Szrj ops.quick_push (op1);
5945*58e805e6Szrj ops.quick_push (op2);
5946*58e805e6Szrj auto_vec<vec<tree> > vec_defs(3);
5947*58e805e6Szrj vect_get_slp_defs (ops, slp_node, &vec_defs);
5948*58e805e6Szrj vec_oprnds0 = vec_defs[0];
5949*58e805e6Szrj vec_oprnds1 = vec_defs[1];
5950*58e805e6Szrj vec_oprnds2 = vec_defs[2];
5951*58e805e6Szrj }
5952*58e805e6Szrj else
5953*58e805e6Szrj {
5954*58e805e6Szrj vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5955*58e805e6Szrj NULL);
5956*58e805e6Szrj vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5957*58e805e6Szrj NULL);
5958*58e805e6Szrj }
5959*58e805e6Szrj }
596038fd1498Szrj else
596138fd1498Szrj vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
596238fd1498Szrj slp_node);
596338fd1498Szrj }
596438fd1498Szrj else
596538fd1498Szrj {
596638fd1498Szrj vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
596738fd1498Szrj if (op_type == ternary_op)
596838fd1498Szrj {
596938fd1498Szrj tree vec_oprnd = vec_oprnds2.pop ();
597038fd1498Szrj vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
597138fd1498Szrj vec_oprnd));
597238fd1498Szrj }
597338fd1498Szrj }
597438fd1498Szrj
597538fd1498Szrj /* Arguments are ready. Create the new vector stmt. */
597638fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
597738fd1498Szrj {
597838fd1498Szrj vop1 = ((op_type == binary_op || op_type == ternary_op)
597938fd1498Szrj ? vec_oprnds1[i] : NULL_TREE);
598038fd1498Szrj vop2 = ((op_type == ternary_op)
598138fd1498Szrj ? vec_oprnds2[i] : NULL_TREE);
598238fd1498Szrj new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
598338fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
598438fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
598538fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
598638fd1498Szrj if (vec_cvt_dest)
598738fd1498Szrj {
598838fd1498Szrj new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
598938fd1498Szrj new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
599038fd1498Szrj new_temp);
599138fd1498Szrj new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
599238fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
599338fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
599438fd1498Szrj }
599538fd1498Szrj if (slp_node)
599638fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
599738fd1498Szrj }
599838fd1498Szrj
599938fd1498Szrj if (slp_node)
600038fd1498Szrj continue;
600138fd1498Szrj
600238fd1498Szrj if (j == 0)
600338fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
600438fd1498Szrj else
600538fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
600638fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
600738fd1498Szrj }
600838fd1498Szrj
600938fd1498Szrj vec_oprnds0.release ();
601038fd1498Szrj vec_oprnds1.release ();
601138fd1498Szrj vec_oprnds2.release ();
601238fd1498Szrj
601338fd1498Szrj return true;
601438fd1498Szrj }
601538fd1498Szrj
601638fd1498Szrj /* A helper function to ensure data reference DR's base alignment. */
601738fd1498Szrj
601838fd1498Szrj static void
ensure_base_align(struct data_reference * dr)601938fd1498Szrj ensure_base_align (struct data_reference *dr)
602038fd1498Szrj {
602138fd1498Szrj if (!dr->aux)
602238fd1498Szrj return;
602338fd1498Szrj
602438fd1498Szrj if (DR_VECT_AUX (dr)->base_misaligned)
602538fd1498Szrj {
602638fd1498Szrj tree base_decl = DR_VECT_AUX (dr)->base_decl;
602738fd1498Szrj
602838fd1498Szrj unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
602938fd1498Szrj
603038fd1498Szrj if (decl_in_symtab_p (base_decl))
603138fd1498Szrj symtab_node::get (base_decl)->increase_alignment (align_base_to);
603238fd1498Szrj else
603338fd1498Szrj {
603438fd1498Szrj SET_DECL_ALIGN (base_decl, align_base_to);
603538fd1498Szrj DECL_USER_ALIGN (base_decl) = 1;
603638fd1498Szrj }
603738fd1498Szrj DR_VECT_AUX (dr)->base_misaligned = false;
603838fd1498Szrj }
603938fd1498Szrj }
604038fd1498Szrj
604138fd1498Szrj
604238fd1498Szrj /* Function get_group_alias_ptr_type.
604338fd1498Szrj
604438fd1498Szrj Return the alias type for the group starting at FIRST_STMT. */
604538fd1498Szrj
604638fd1498Szrj static tree
get_group_alias_ptr_type(gimple * first_stmt)604738fd1498Szrj get_group_alias_ptr_type (gimple *first_stmt)
604838fd1498Szrj {
604938fd1498Szrj struct data_reference *first_dr, *next_dr;
605038fd1498Szrj gimple *next_stmt;
605138fd1498Szrj
605238fd1498Szrj first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
605338fd1498Szrj next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
605438fd1498Szrj while (next_stmt)
605538fd1498Szrj {
605638fd1498Szrj next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
605738fd1498Szrj if (get_alias_set (DR_REF (first_dr))
605838fd1498Szrj != get_alias_set (DR_REF (next_dr)))
605938fd1498Szrj {
606038fd1498Szrj if (dump_enabled_p ())
606138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
606238fd1498Szrj "conflicting alias set types.\n");
606338fd1498Szrj return ptr_type_node;
606438fd1498Szrj }
606538fd1498Szrj next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
606638fd1498Szrj }
606738fd1498Szrj return reference_alias_ptr_type (DR_REF (first_dr));
606838fd1498Szrj }
606938fd1498Szrj
607038fd1498Szrj
607138fd1498Szrj /* Function vectorizable_store.
607238fd1498Szrj
607338fd1498Szrj Check if STMT defines a non scalar data-ref (array/pointer/structure) that
607438fd1498Szrj can be vectorized.
607538fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
607638fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at BSI.
607738fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
607838fd1498Szrj
607938fd1498Szrj static bool
vectorizable_store(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)608038fd1498Szrj vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
608138fd1498Szrj slp_tree slp_node)
608238fd1498Szrj {
608338fd1498Szrj tree data_ref;
608438fd1498Szrj tree op;
608538fd1498Szrj tree vec_oprnd = NULL_TREE;
608638fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
608738fd1498Szrj struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
608838fd1498Szrj tree elem_type;
608938fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
609038fd1498Szrj struct loop *loop = NULL;
609138fd1498Szrj machine_mode vec_mode;
609238fd1498Szrj tree dummy;
609338fd1498Szrj enum dr_alignment_support alignment_support_scheme;
609438fd1498Szrj gimple *def_stmt;
609538fd1498Szrj enum vect_def_type rhs_dt = vect_unknown_def_type;
609638fd1498Szrj enum vect_def_type mask_dt = vect_unknown_def_type;
609738fd1498Szrj stmt_vec_info prev_stmt_info = NULL;
609838fd1498Szrj tree dataref_ptr = NULL_TREE;
609938fd1498Szrj tree dataref_offset = NULL_TREE;
610038fd1498Szrj gimple *ptr_incr = NULL;
610138fd1498Szrj int ncopies;
610238fd1498Szrj int j;
610338fd1498Szrj gimple *next_stmt, *first_stmt;
610438fd1498Szrj bool grouped_store;
610538fd1498Szrj unsigned int group_size, i;
610638fd1498Szrj vec<tree> oprnds = vNULL;
610738fd1498Szrj vec<tree> result_chain = vNULL;
610838fd1498Szrj bool inv_p;
610938fd1498Szrj tree offset = NULL_TREE;
611038fd1498Szrj vec<tree> vec_oprnds = vNULL;
611138fd1498Szrj bool slp = (slp_node != NULL);
611238fd1498Szrj unsigned int vec_num;
611338fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
611438fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
611538fd1498Szrj tree aggr_type;
611638fd1498Szrj gather_scatter_info gs_info;
611738fd1498Szrj gimple *new_stmt;
611838fd1498Szrj poly_uint64 vf;
611938fd1498Szrj vec_load_store_type vls_type;
612038fd1498Szrj tree ref_type;
612138fd1498Szrj
612238fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
612338fd1498Szrj return false;
612438fd1498Szrj
612538fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
612638fd1498Szrj && ! vec_stmt)
612738fd1498Szrj return false;
612838fd1498Szrj
612938fd1498Szrj /* Is vectorizable store? */
613038fd1498Szrj
613138fd1498Szrj tree mask = NULL_TREE, mask_vectype = NULL_TREE;
613238fd1498Szrj if (is_gimple_assign (stmt))
613338fd1498Szrj {
613438fd1498Szrj tree scalar_dest = gimple_assign_lhs (stmt);
613538fd1498Szrj if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
613638fd1498Szrj && is_pattern_stmt_p (stmt_info))
613738fd1498Szrj scalar_dest = TREE_OPERAND (scalar_dest, 0);
613838fd1498Szrj if (TREE_CODE (scalar_dest) != ARRAY_REF
613938fd1498Szrj && TREE_CODE (scalar_dest) != BIT_FIELD_REF
614038fd1498Szrj && TREE_CODE (scalar_dest) != INDIRECT_REF
614138fd1498Szrj && TREE_CODE (scalar_dest) != COMPONENT_REF
614238fd1498Szrj && TREE_CODE (scalar_dest) != IMAGPART_EXPR
614338fd1498Szrj && TREE_CODE (scalar_dest) != REALPART_EXPR
614438fd1498Szrj && TREE_CODE (scalar_dest) != MEM_REF)
614538fd1498Szrj return false;
614638fd1498Szrj }
614738fd1498Szrj else
614838fd1498Szrj {
614938fd1498Szrj gcall *call = dyn_cast <gcall *> (stmt);
615038fd1498Szrj if (!call || !gimple_call_internal_p (call))
615138fd1498Szrj return false;
615238fd1498Szrj
615338fd1498Szrj internal_fn ifn = gimple_call_internal_fn (call);
615438fd1498Szrj if (!internal_store_fn_p (ifn))
615538fd1498Szrj return false;
615638fd1498Szrj
615738fd1498Szrj if (slp_node != NULL)
615838fd1498Szrj {
615938fd1498Szrj if (dump_enabled_p ())
616038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616138fd1498Szrj "SLP of masked stores not supported.\n");
616238fd1498Szrj return false;
616338fd1498Szrj }
616438fd1498Szrj
616538fd1498Szrj int mask_index = internal_fn_mask_index (ifn);
616638fd1498Szrj if (mask_index >= 0)
616738fd1498Szrj {
616838fd1498Szrj mask = gimple_call_arg (call, mask_index);
616938fd1498Szrj if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
617038fd1498Szrj &mask_vectype))
617138fd1498Szrj return false;
617238fd1498Szrj }
617338fd1498Szrj }
617438fd1498Szrj
617538fd1498Szrj op = vect_get_store_rhs (stmt);
617638fd1498Szrj
617738fd1498Szrj /* Cannot have hybrid store SLP -- that would mean storing to the
617838fd1498Szrj same location twice. */
617938fd1498Szrj gcc_assert (slp == PURE_SLP_STMT (stmt_info));
618038fd1498Szrj
618138fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
618238fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
618338fd1498Szrj
618438fd1498Szrj if (loop_vinfo)
618538fd1498Szrj {
618638fd1498Szrj loop = LOOP_VINFO_LOOP (loop_vinfo);
618738fd1498Szrj vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
618838fd1498Szrj }
618938fd1498Szrj else
619038fd1498Szrj vf = 1;
619138fd1498Szrj
619238fd1498Szrj /* Multiple types in SLP are handled by creating the appropriate number of
619338fd1498Szrj vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
619438fd1498Szrj case of SLP. */
619538fd1498Szrj if (slp)
619638fd1498Szrj ncopies = 1;
619738fd1498Szrj else
619838fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
619938fd1498Szrj
620038fd1498Szrj gcc_assert (ncopies >= 1);
620138fd1498Szrj
620238fd1498Szrj /* FORNOW. This restriction should be relaxed. */
620338fd1498Szrj if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
620438fd1498Szrj {
620538fd1498Szrj if (dump_enabled_p ())
620638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
620738fd1498Szrj "multiple types in nested loop.\n");
620838fd1498Szrj return false;
620938fd1498Szrj }
621038fd1498Szrj
621138fd1498Szrj if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
621238fd1498Szrj return false;
621338fd1498Szrj
621438fd1498Szrj elem_type = TREE_TYPE (vectype);
621538fd1498Szrj vec_mode = TYPE_MODE (vectype);
621638fd1498Szrj
621738fd1498Szrj if (!STMT_VINFO_DATA_REF (stmt_info))
621838fd1498Szrj return false;
621938fd1498Szrj
622038fd1498Szrj vect_memory_access_type memory_access_type;
622138fd1498Szrj if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
622238fd1498Szrj &memory_access_type, &gs_info))
622338fd1498Szrj return false;
622438fd1498Szrj
622538fd1498Szrj if (mask)
622638fd1498Szrj {
622738fd1498Szrj if (memory_access_type == VMAT_CONTIGUOUS)
622838fd1498Szrj {
622938fd1498Szrj if (!VECTOR_MODE_P (vec_mode)
623038fd1498Szrj || !can_vec_mask_load_store_p (vec_mode,
623138fd1498Szrj TYPE_MODE (mask_vectype), false))
623238fd1498Szrj return false;
623338fd1498Szrj }
623438fd1498Szrj else if (memory_access_type != VMAT_LOAD_STORE_LANES
623538fd1498Szrj && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
623638fd1498Szrj {
623738fd1498Szrj if (dump_enabled_p ())
623838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
623938fd1498Szrj "unsupported access type for masked store.\n");
624038fd1498Szrj return false;
624138fd1498Szrj }
624238fd1498Szrj }
624338fd1498Szrj else
624438fd1498Szrj {
624538fd1498Szrj /* FORNOW. In some cases can vectorize even if data-type not supported
624638fd1498Szrj (e.g. - array initialization with 0). */
624738fd1498Szrj if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
624838fd1498Szrj return false;
624938fd1498Szrj }
625038fd1498Szrj
625138fd1498Szrj grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
625238fd1498Szrj && memory_access_type != VMAT_GATHER_SCATTER
625338fd1498Szrj && (slp || memory_access_type != VMAT_CONTIGUOUS));
625438fd1498Szrj if (grouped_store)
625538fd1498Szrj {
625638fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
625738fd1498Szrj first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
625838fd1498Szrj group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
625938fd1498Szrj }
626038fd1498Szrj else
626138fd1498Szrj {
626238fd1498Szrj first_stmt = stmt;
626338fd1498Szrj first_dr = dr;
626438fd1498Szrj group_size = vec_num = 1;
626538fd1498Szrj }
626638fd1498Szrj
626738fd1498Szrj if (!vec_stmt) /* transformation not required. */
626838fd1498Szrj {
626938fd1498Szrj STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
627038fd1498Szrj
627138fd1498Szrj if (loop_vinfo
627238fd1498Szrj && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
627338fd1498Szrj check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
627438fd1498Szrj memory_access_type, &gs_info);
627538fd1498Szrj
627638fd1498Szrj STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
627738fd1498Szrj /* The SLP costs are calculated during SLP analysis. */
627838fd1498Szrj if (!slp_node)
627938fd1498Szrj vect_model_store_cost (stmt_info, ncopies, memory_access_type,
628038fd1498Szrj vls_type, NULL, NULL, NULL);
628138fd1498Szrj return true;
628238fd1498Szrj }
628338fd1498Szrj gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
628438fd1498Szrj
628538fd1498Szrj /* Transform. */
628638fd1498Szrj
628738fd1498Szrj ensure_base_align (dr);
628838fd1498Szrj
628938fd1498Szrj if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
629038fd1498Szrj {
629138fd1498Szrj tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
629238fd1498Szrj tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
629338fd1498Szrj tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
629438fd1498Szrj tree ptr, mask, var, scale, perm_mask = NULL_TREE;
629538fd1498Szrj edge pe = loop_preheader_edge (loop);
629638fd1498Szrj gimple_seq seq;
629738fd1498Szrj basic_block new_bb;
629838fd1498Szrj enum { NARROW, NONE, WIDEN } modifier;
629938fd1498Szrj poly_uint64 scatter_off_nunits
630038fd1498Szrj = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
630138fd1498Szrj
630238fd1498Szrj if (known_eq (nunits, scatter_off_nunits))
630338fd1498Szrj modifier = NONE;
630438fd1498Szrj else if (known_eq (nunits * 2, scatter_off_nunits))
630538fd1498Szrj {
630638fd1498Szrj modifier = WIDEN;
630738fd1498Szrj
630838fd1498Szrj /* Currently gathers and scatters are only supported for
630938fd1498Szrj fixed-length vectors. */
631038fd1498Szrj unsigned int count = scatter_off_nunits.to_constant ();
631138fd1498Szrj vec_perm_builder sel (count, count, 1);
631238fd1498Szrj for (i = 0; i < (unsigned int) count; ++i)
631338fd1498Szrj sel.quick_push (i | (count / 2));
631438fd1498Szrj
631538fd1498Szrj vec_perm_indices indices (sel, 1, count);
631638fd1498Szrj perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
631738fd1498Szrj indices);
631838fd1498Szrj gcc_assert (perm_mask != NULL_TREE);
631938fd1498Szrj }
632038fd1498Szrj else if (known_eq (nunits, scatter_off_nunits * 2))
632138fd1498Szrj {
632238fd1498Szrj modifier = NARROW;
632338fd1498Szrj
632438fd1498Szrj /* Currently gathers and scatters are only supported for
632538fd1498Szrj fixed-length vectors. */
632638fd1498Szrj unsigned int count = nunits.to_constant ();
632738fd1498Szrj vec_perm_builder sel (count, count, 1);
632838fd1498Szrj for (i = 0; i < (unsigned int) count; ++i)
632938fd1498Szrj sel.quick_push (i | (count / 2));
633038fd1498Szrj
633138fd1498Szrj vec_perm_indices indices (sel, 2, count);
633238fd1498Szrj perm_mask = vect_gen_perm_mask_checked (vectype, indices);
633338fd1498Szrj gcc_assert (perm_mask != NULL_TREE);
633438fd1498Szrj ncopies *= 2;
633538fd1498Szrj }
633638fd1498Szrj else
633738fd1498Szrj gcc_unreachable ();
633838fd1498Szrj
633938fd1498Szrj rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
634038fd1498Szrj ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634138fd1498Szrj masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634238fd1498Szrj idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634338fd1498Szrj srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634438fd1498Szrj scaletype = TREE_VALUE (arglist);
634538fd1498Szrj
634638fd1498Szrj gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
634738fd1498Szrj && TREE_CODE (rettype) == VOID_TYPE);
634838fd1498Szrj
634938fd1498Szrj ptr = fold_convert (ptrtype, gs_info.base);
635038fd1498Szrj if (!is_gimple_min_invariant (ptr))
635138fd1498Szrj {
635238fd1498Szrj ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
635338fd1498Szrj new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
635438fd1498Szrj gcc_assert (!new_bb);
635538fd1498Szrj }
635638fd1498Szrj
635738fd1498Szrj /* Currently we support only unconditional scatter stores,
635838fd1498Szrj so mask should be all ones. */
635938fd1498Szrj mask = build_int_cst (masktype, -1);
636038fd1498Szrj mask = vect_init_vector (stmt, mask, masktype, NULL);
636138fd1498Szrj
636238fd1498Szrj scale = build_int_cst (scaletype, gs_info.scale);
636338fd1498Szrj
636438fd1498Szrj prev_stmt_info = NULL;
636538fd1498Szrj for (j = 0; j < ncopies; ++j)
636638fd1498Szrj {
636738fd1498Szrj if (j == 0)
636838fd1498Szrj {
636938fd1498Szrj src = vec_oprnd1
637038fd1498Szrj = vect_get_vec_def_for_operand (op, stmt);
637138fd1498Szrj op = vec_oprnd0
637238fd1498Szrj = vect_get_vec_def_for_operand (gs_info.offset, stmt);
637338fd1498Szrj }
637438fd1498Szrj else if (modifier != NONE && (j & 1))
637538fd1498Szrj {
637638fd1498Szrj if (modifier == WIDEN)
637738fd1498Szrj {
637838fd1498Szrj src = vec_oprnd1
637938fd1498Szrj = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
638038fd1498Szrj op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
638138fd1498Szrj stmt, gsi);
638238fd1498Szrj }
638338fd1498Szrj else if (modifier == NARROW)
638438fd1498Szrj {
638538fd1498Szrj src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
638638fd1498Szrj stmt, gsi);
638738fd1498Szrj op = vec_oprnd0
638838fd1498Szrj = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
638938fd1498Szrj vec_oprnd0);
639038fd1498Szrj }
639138fd1498Szrj else
639238fd1498Szrj gcc_unreachable ();
639338fd1498Szrj }
639438fd1498Szrj else
639538fd1498Szrj {
639638fd1498Szrj src = vec_oprnd1
639738fd1498Szrj = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
639838fd1498Szrj op = vec_oprnd0
639938fd1498Szrj = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
640038fd1498Szrj vec_oprnd0);
640138fd1498Szrj }
640238fd1498Szrj
640338fd1498Szrj if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
640438fd1498Szrj {
640538fd1498Szrj gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
640638fd1498Szrj TYPE_VECTOR_SUBPARTS (srctype)));
640738fd1498Szrj var = vect_get_new_ssa_name (srctype, vect_simple_var);
640838fd1498Szrj src = build1 (VIEW_CONVERT_EXPR, srctype, src);
640938fd1498Szrj new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
641038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
641138fd1498Szrj src = var;
641238fd1498Szrj }
641338fd1498Szrj
641438fd1498Szrj if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
641538fd1498Szrj {
641638fd1498Szrj gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
641738fd1498Szrj TYPE_VECTOR_SUBPARTS (idxtype)));
641838fd1498Szrj var = vect_get_new_ssa_name (idxtype, vect_simple_var);
641938fd1498Szrj op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
642038fd1498Szrj new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
642138fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
642238fd1498Szrj op = var;
642338fd1498Szrj }
642438fd1498Szrj
642538fd1498Szrj new_stmt
642638fd1498Szrj = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
642738fd1498Szrj
642838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
642938fd1498Szrj
643038fd1498Szrj if (prev_stmt_info == NULL)
643138fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
643238fd1498Szrj else
643338fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
643438fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
643538fd1498Szrj }
643638fd1498Szrj return true;
643738fd1498Szrj }
643838fd1498Szrj
643938fd1498Szrj if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
644038fd1498Szrj {
644138fd1498Szrj gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
644238fd1498Szrj GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
644338fd1498Szrj }
644438fd1498Szrj
644538fd1498Szrj if (grouped_store)
644638fd1498Szrj {
644738fd1498Szrj /* FORNOW */
644838fd1498Szrj gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
644938fd1498Szrj
645038fd1498Szrj /* We vectorize all the stmts of the interleaving group when we
645138fd1498Szrj reach the last stmt in the group. */
645238fd1498Szrj if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
645338fd1498Szrj < GROUP_SIZE (vinfo_for_stmt (first_stmt))
645438fd1498Szrj && !slp)
645538fd1498Szrj {
645638fd1498Szrj *vec_stmt = NULL;
645738fd1498Szrj return true;
645838fd1498Szrj }
645938fd1498Szrj
646038fd1498Szrj if (slp)
646138fd1498Szrj {
646238fd1498Szrj grouped_store = false;
646338fd1498Szrj /* VEC_NUM is the number of vect stmts to be created for this
646438fd1498Szrj group. */
646538fd1498Szrj vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
646638fd1498Szrj first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
646738fd1498Szrj gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
646838fd1498Szrj first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
646938fd1498Szrj op = vect_get_store_rhs (first_stmt);
647038fd1498Szrj }
647138fd1498Szrj else
647238fd1498Szrj /* VEC_NUM is the number of vect stmts to be created for this
647338fd1498Szrj group. */
647438fd1498Szrj vec_num = group_size;
647538fd1498Szrj
647638fd1498Szrj ref_type = get_group_alias_ptr_type (first_stmt);
647738fd1498Szrj }
647838fd1498Szrj else
647938fd1498Szrj ref_type = reference_alias_ptr_type (DR_REF (first_dr));
648038fd1498Szrj
648138fd1498Szrj if (dump_enabled_p ())
648238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
648338fd1498Szrj "transform store. ncopies = %d\n", ncopies);
648438fd1498Szrj
648538fd1498Szrj if (memory_access_type == VMAT_ELEMENTWISE
648638fd1498Szrj || memory_access_type == VMAT_STRIDED_SLP)
648738fd1498Szrj {
648838fd1498Szrj gimple_stmt_iterator incr_gsi;
648938fd1498Szrj bool insert_after;
649038fd1498Szrj gimple *incr;
649138fd1498Szrj tree offvar;
649238fd1498Szrj tree ivstep;
649338fd1498Szrj tree running_off;
649438fd1498Szrj tree stride_base, stride_step, alias_off;
649538fd1498Szrj tree vec_oprnd;
649638fd1498Szrj unsigned int g;
649738fd1498Szrj /* Checked by get_load_store_type. */
649838fd1498Szrj unsigned int const_nunits = nunits.to_constant ();
649938fd1498Szrj
650038fd1498Szrj gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
650138fd1498Szrj gcc_assert (!nested_in_vect_loop_p (loop, stmt));
650238fd1498Szrj
650338fd1498Szrj stride_base
650438fd1498Szrj = fold_build_pointer_plus
650538fd1498Szrj (DR_BASE_ADDRESS (first_dr),
650638fd1498Szrj size_binop (PLUS_EXPR,
650738fd1498Szrj convert_to_ptrofftype (DR_OFFSET (first_dr)),
650838fd1498Szrj convert_to_ptrofftype (DR_INIT (first_dr))));
650938fd1498Szrj stride_step = fold_convert (sizetype, DR_STEP (first_dr));
651038fd1498Szrj
651138fd1498Szrj /* For a store with loop-invariant (but other than power-of-2)
651238fd1498Szrj stride (i.e. not a grouped access) like so:
651338fd1498Szrj
651438fd1498Szrj for (i = 0; i < n; i += stride)
651538fd1498Szrj array[i] = ...;
651638fd1498Szrj
651738fd1498Szrj we generate a new induction variable and new stores from
651838fd1498Szrj the components of the (vectorized) rhs:
651938fd1498Szrj
652038fd1498Szrj for (j = 0; ; j += VF*stride)
652138fd1498Szrj vectemp = ...;
652238fd1498Szrj tmp1 = vectemp[0];
652338fd1498Szrj array[j] = tmp1;
652438fd1498Szrj tmp2 = vectemp[1];
652538fd1498Szrj array[j + stride] = tmp2;
652638fd1498Szrj ...
652738fd1498Szrj */
652838fd1498Szrj
652938fd1498Szrj unsigned nstores = const_nunits;
653038fd1498Szrj unsigned lnel = 1;
653138fd1498Szrj tree ltype = elem_type;
653238fd1498Szrj tree lvectype = vectype;
653338fd1498Szrj if (slp)
653438fd1498Szrj {
653538fd1498Szrj if (group_size < const_nunits
653638fd1498Szrj && const_nunits % group_size == 0)
653738fd1498Szrj {
653838fd1498Szrj nstores = const_nunits / group_size;
653938fd1498Szrj lnel = group_size;
654038fd1498Szrj ltype = build_vector_type (elem_type, group_size);
654138fd1498Szrj lvectype = vectype;
654238fd1498Szrj
654338fd1498Szrj /* First check if vec_extract optab doesn't support extraction
654438fd1498Szrj of vector elts directly. */
654538fd1498Szrj scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
654638fd1498Szrj machine_mode vmode;
654738fd1498Szrj if (!mode_for_vector (elmode, group_size).exists (&vmode)
654838fd1498Szrj || !VECTOR_MODE_P (vmode)
654938fd1498Szrj || !targetm.vector_mode_supported_p (vmode)
655038fd1498Szrj || (convert_optab_handler (vec_extract_optab,
655138fd1498Szrj TYPE_MODE (vectype), vmode)
655238fd1498Szrj == CODE_FOR_nothing))
655338fd1498Szrj {
655438fd1498Szrj /* Try to avoid emitting an extract of vector elements
655538fd1498Szrj by performing the extracts using an integer type of the
655638fd1498Szrj same size, extracting from a vector of those and then
655738fd1498Szrj re-interpreting it as the original vector type if
655838fd1498Szrj supported. */
655938fd1498Szrj unsigned lsize
656038fd1498Szrj = group_size * GET_MODE_BITSIZE (elmode);
656138fd1498Szrj elmode = int_mode_for_size (lsize, 0).require ();
656238fd1498Szrj unsigned int lnunits = const_nunits / group_size;
656338fd1498Szrj /* If we can't construct such a vector fall back to
656438fd1498Szrj element extracts from the original vector type and
656538fd1498Szrj element size stores. */
656638fd1498Szrj if (mode_for_vector (elmode, lnunits).exists (&vmode)
656738fd1498Szrj && VECTOR_MODE_P (vmode)
656838fd1498Szrj && targetm.vector_mode_supported_p (vmode)
656938fd1498Szrj && (convert_optab_handler (vec_extract_optab,
657038fd1498Szrj vmode, elmode)
657138fd1498Szrj != CODE_FOR_nothing))
657238fd1498Szrj {
657338fd1498Szrj nstores = lnunits;
657438fd1498Szrj lnel = group_size;
657538fd1498Szrj ltype = build_nonstandard_integer_type (lsize, 1);
657638fd1498Szrj lvectype = build_vector_type (ltype, nstores);
657738fd1498Szrj }
657838fd1498Szrj /* Else fall back to vector extraction anyway.
657938fd1498Szrj Fewer stores are more important than avoiding spilling
658038fd1498Szrj of the vector we extract from. Compared to the
658138fd1498Szrj construction case in vectorizable_load no store-forwarding
658238fd1498Szrj issue exists here for reasonable archs. */
658338fd1498Szrj }
658438fd1498Szrj }
658538fd1498Szrj else if (group_size >= const_nunits
658638fd1498Szrj && group_size % const_nunits == 0)
658738fd1498Szrj {
658838fd1498Szrj nstores = 1;
658938fd1498Szrj lnel = const_nunits;
659038fd1498Szrj ltype = vectype;
659138fd1498Szrj lvectype = vectype;
659238fd1498Szrj }
659338fd1498Szrj ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
659438fd1498Szrj ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
659538fd1498Szrj }
659638fd1498Szrj
659738fd1498Szrj ivstep = stride_step;
659838fd1498Szrj ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
659938fd1498Szrj build_int_cst (TREE_TYPE (ivstep), vf));
660038fd1498Szrj
660138fd1498Szrj standard_iv_increment_position (loop, &incr_gsi, &insert_after);
660238fd1498Szrj
660338fd1498Szrj stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
660438fd1498Szrj ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
660538fd1498Szrj create_iv (stride_base, ivstep, NULL,
660638fd1498Szrj loop, &incr_gsi, insert_after,
660738fd1498Szrj &offvar, NULL);
660838fd1498Szrj incr = gsi_stmt (incr_gsi);
660938fd1498Szrj set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
661038fd1498Szrj
661138fd1498Szrj stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
661238fd1498Szrj
661338fd1498Szrj prev_stmt_info = NULL;
661438fd1498Szrj alias_off = build_int_cst (ref_type, 0);
661538fd1498Szrj next_stmt = first_stmt;
661638fd1498Szrj for (g = 0; g < group_size; g++)
661738fd1498Szrj {
661838fd1498Szrj running_off = offvar;
661938fd1498Szrj if (g)
662038fd1498Szrj {
662138fd1498Szrj tree size = TYPE_SIZE_UNIT (ltype);
662238fd1498Szrj tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
662338fd1498Szrj size);
662438fd1498Szrj tree newoff = copy_ssa_name (running_off, NULL);
662538fd1498Szrj incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
662638fd1498Szrj running_off, pos);
662738fd1498Szrj vect_finish_stmt_generation (stmt, incr, gsi);
662838fd1498Szrj running_off = newoff;
662938fd1498Szrj }
663038fd1498Szrj unsigned int group_el = 0;
663138fd1498Szrj unsigned HOST_WIDE_INT
663238fd1498Szrj elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
663338fd1498Szrj for (j = 0; j < ncopies; j++)
663438fd1498Szrj {
663538fd1498Szrj /* We've set op and dt above, from vect_get_store_rhs,
663638fd1498Szrj and first_stmt == stmt. */
663738fd1498Szrj if (j == 0)
663838fd1498Szrj {
663938fd1498Szrj if (slp)
664038fd1498Szrj {
664138fd1498Szrj vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
664238fd1498Szrj slp_node);
664338fd1498Szrj vec_oprnd = vec_oprnds[0];
664438fd1498Szrj }
664538fd1498Szrj else
664638fd1498Szrj {
664738fd1498Szrj op = vect_get_store_rhs (next_stmt);
664838fd1498Szrj vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
664938fd1498Szrj }
665038fd1498Szrj }
665138fd1498Szrj else
665238fd1498Szrj {
665338fd1498Szrj if (slp)
665438fd1498Szrj vec_oprnd = vec_oprnds[j];
665538fd1498Szrj else
665638fd1498Szrj {
665738fd1498Szrj vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
665838fd1498Szrj vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
665938fd1498Szrj vec_oprnd);
666038fd1498Szrj }
666138fd1498Szrj }
666238fd1498Szrj /* Pun the vector to extract from if necessary. */
666338fd1498Szrj if (lvectype != vectype)
666438fd1498Szrj {
666538fd1498Szrj tree tem = make_ssa_name (lvectype);
666638fd1498Szrj gimple *pun
666738fd1498Szrj = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
666838fd1498Szrj lvectype, vec_oprnd));
666938fd1498Szrj vect_finish_stmt_generation (stmt, pun, gsi);
667038fd1498Szrj vec_oprnd = tem;
667138fd1498Szrj }
667238fd1498Szrj for (i = 0; i < nstores; i++)
667338fd1498Szrj {
667438fd1498Szrj tree newref, newoff;
667538fd1498Szrj gimple *incr, *assign;
667638fd1498Szrj tree size = TYPE_SIZE (ltype);
667738fd1498Szrj /* Extract the i'th component. */
667838fd1498Szrj tree pos = fold_build2 (MULT_EXPR, bitsizetype,
667938fd1498Szrj bitsize_int (i), size);
668038fd1498Szrj tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
668138fd1498Szrj size, pos);
668238fd1498Szrj
668338fd1498Szrj elem = force_gimple_operand_gsi (gsi, elem, true,
668438fd1498Szrj NULL_TREE, true,
668538fd1498Szrj GSI_SAME_STMT);
668638fd1498Szrj
668738fd1498Szrj tree this_off = build_int_cst (TREE_TYPE (alias_off),
668838fd1498Szrj group_el * elsz);
668938fd1498Szrj newref = build2 (MEM_REF, ltype,
669038fd1498Szrj running_off, this_off);
669138fd1498Szrj vect_copy_ref_info (newref, DR_REF (first_dr));
669238fd1498Szrj
669338fd1498Szrj /* And store it to *running_off. */
669438fd1498Szrj assign = gimple_build_assign (newref, elem);
669538fd1498Szrj vect_finish_stmt_generation (stmt, assign, gsi);
669638fd1498Szrj
669738fd1498Szrj group_el += lnel;
669838fd1498Szrj if (! slp
669938fd1498Szrj || group_el == group_size)
670038fd1498Szrj {
670138fd1498Szrj newoff = copy_ssa_name (running_off, NULL);
670238fd1498Szrj incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
670338fd1498Szrj running_off, stride_step);
670438fd1498Szrj vect_finish_stmt_generation (stmt, incr, gsi);
670538fd1498Szrj
670638fd1498Szrj running_off = newoff;
670738fd1498Szrj group_el = 0;
670838fd1498Szrj }
670938fd1498Szrj if (g == group_size - 1
671038fd1498Szrj && !slp)
671138fd1498Szrj {
671238fd1498Szrj if (j == 0 && i == 0)
671338fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info)
671438fd1498Szrj = *vec_stmt = assign;
671538fd1498Szrj else
671638fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
671738fd1498Szrj prev_stmt_info = vinfo_for_stmt (assign);
671838fd1498Szrj }
671938fd1498Szrj }
672038fd1498Szrj }
672138fd1498Szrj next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
672238fd1498Szrj if (slp)
672338fd1498Szrj break;
672438fd1498Szrj }
672538fd1498Szrj
672638fd1498Szrj vec_oprnds.release ();
672738fd1498Szrj return true;
672838fd1498Szrj }
672938fd1498Szrj
673038fd1498Szrj auto_vec<tree> dr_chain (group_size);
673138fd1498Szrj oprnds.create (group_size);
673238fd1498Szrj
673338fd1498Szrj alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
673438fd1498Szrj gcc_assert (alignment_support_scheme);
673538fd1498Szrj vec_loop_masks *loop_masks
673638fd1498Szrj = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
673738fd1498Szrj ? &LOOP_VINFO_MASKS (loop_vinfo)
673838fd1498Szrj : NULL);
673938fd1498Szrj /* Targets with store-lane instructions must not require explicit
674038fd1498Szrj realignment. vect_supportable_dr_alignment always returns either
674138fd1498Szrj dr_aligned or dr_unaligned_supported for masked operations. */
674238fd1498Szrj gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
674338fd1498Szrj && !mask
674438fd1498Szrj && !loop_masks)
674538fd1498Szrj || alignment_support_scheme == dr_aligned
674638fd1498Szrj || alignment_support_scheme == dr_unaligned_supported);
674738fd1498Szrj
674838fd1498Szrj if (memory_access_type == VMAT_CONTIGUOUS_DOWN
674938fd1498Szrj || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
675038fd1498Szrj offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
675138fd1498Szrj
675238fd1498Szrj tree bump;
675338fd1498Szrj tree vec_offset = NULL_TREE;
675438fd1498Szrj if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
675538fd1498Szrj {
675638fd1498Szrj aggr_type = NULL_TREE;
675738fd1498Szrj bump = NULL_TREE;
675838fd1498Szrj }
675938fd1498Szrj else if (memory_access_type == VMAT_GATHER_SCATTER)
676038fd1498Szrj {
676138fd1498Szrj aggr_type = elem_type;
676238fd1498Szrj vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
676338fd1498Szrj &bump, &vec_offset);
676438fd1498Szrj }
676538fd1498Szrj else
676638fd1498Szrj {
676738fd1498Szrj if (memory_access_type == VMAT_LOAD_STORE_LANES)
676838fd1498Szrj aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
676938fd1498Szrj else
677038fd1498Szrj aggr_type = vectype;
677138fd1498Szrj bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
677238fd1498Szrj }
677338fd1498Szrj
677438fd1498Szrj if (mask)
677538fd1498Szrj LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
677638fd1498Szrj
677738fd1498Szrj /* In case the vectorization factor (VF) is bigger than the number
677838fd1498Szrj of elements that we can fit in a vectype (nunits), we have to generate
677938fd1498Szrj more than one vector stmt - i.e - we need to "unroll" the
678038fd1498Szrj vector stmt by a factor VF/nunits. For more details see documentation in
678138fd1498Szrj vect_get_vec_def_for_copy_stmt. */
678238fd1498Szrj
678338fd1498Szrj /* In case of interleaving (non-unit grouped access):
678438fd1498Szrj
678538fd1498Szrj S1: &base + 2 = x2
678638fd1498Szrj S2: &base = x0
678738fd1498Szrj S3: &base + 1 = x1
678838fd1498Szrj S4: &base + 3 = x3
678938fd1498Szrj
679038fd1498Szrj We create vectorized stores starting from base address (the access of the
679138fd1498Szrj first stmt in the chain (S2 in the above example), when the last store stmt
679238fd1498Szrj of the chain (S4) is reached:
679338fd1498Szrj
679438fd1498Szrj VS1: &base = vx2
679538fd1498Szrj VS2: &base + vec_size*1 = vx0
679638fd1498Szrj VS3: &base + vec_size*2 = vx1
679738fd1498Szrj VS4: &base + vec_size*3 = vx3
679838fd1498Szrj
679938fd1498Szrj Then permutation statements are generated:
680038fd1498Szrj
680138fd1498Szrj VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
680238fd1498Szrj VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
680338fd1498Szrj ...
680438fd1498Szrj
680538fd1498Szrj And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
680638fd1498Szrj (the order of the data-refs in the output of vect_permute_store_chain
680738fd1498Szrj corresponds to the order of scalar stmts in the interleaving chain - see
680838fd1498Szrj the documentation of vect_permute_store_chain()).
680938fd1498Szrj
681038fd1498Szrj In case of both multiple types and interleaving, above vector stores and
681138fd1498Szrj permutation stmts are created for every copy. The result vector stmts are
681238fd1498Szrj put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
681338fd1498Szrj STMT_VINFO_RELATED_STMT for the next copies.
681438fd1498Szrj */
681538fd1498Szrj
681638fd1498Szrj prev_stmt_info = NULL;
681738fd1498Szrj tree vec_mask = NULL_TREE;
681838fd1498Szrj for (j = 0; j < ncopies; j++)
681938fd1498Szrj {
682038fd1498Szrj
682138fd1498Szrj if (j == 0)
682238fd1498Szrj {
682338fd1498Szrj if (slp)
682438fd1498Szrj {
682538fd1498Szrj /* Get vectorized arguments for SLP_NODE. */
682638fd1498Szrj vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
682738fd1498Szrj NULL, slp_node);
682838fd1498Szrj
682938fd1498Szrj vec_oprnd = vec_oprnds[0];
683038fd1498Szrj }
683138fd1498Szrj else
683238fd1498Szrj {
683338fd1498Szrj /* For interleaved stores we collect vectorized defs for all the
683438fd1498Szrj stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
683538fd1498Szrj used as an input to vect_permute_store_chain(), and OPRNDS as
683638fd1498Szrj an input to vect_get_vec_def_for_stmt_copy() for the next copy.
683738fd1498Szrj
683838fd1498Szrj If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
683938fd1498Szrj OPRNDS are of size 1. */
684038fd1498Szrj next_stmt = first_stmt;
684138fd1498Szrj for (i = 0; i < group_size; i++)
684238fd1498Szrj {
684338fd1498Szrj /* Since gaps are not supported for interleaved stores,
684438fd1498Szrj GROUP_SIZE is the exact number of stmts in the chain.
684538fd1498Szrj Therefore, NEXT_STMT can't be NULL_TREE. In case that
684638fd1498Szrj there is no interleaving, GROUP_SIZE is 1, and only one
684738fd1498Szrj iteration of the loop will be executed. */
684838fd1498Szrj op = vect_get_store_rhs (next_stmt);
684938fd1498Szrj vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
685038fd1498Szrj dr_chain.quick_push (vec_oprnd);
685138fd1498Szrj oprnds.quick_push (vec_oprnd);
685238fd1498Szrj next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
685338fd1498Szrj }
685438fd1498Szrj if (mask)
685538fd1498Szrj vec_mask = vect_get_vec_def_for_operand (mask, stmt,
685638fd1498Szrj mask_vectype);
685738fd1498Szrj }
685838fd1498Szrj
685938fd1498Szrj /* We should have catched mismatched types earlier. */
686038fd1498Szrj gcc_assert (useless_type_conversion_p (vectype,
686138fd1498Szrj TREE_TYPE (vec_oprnd)));
686238fd1498Szrj bool simd_lane_access_p
686338fd1498Szrj = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
686438fd1498Szrj if (simd_lane_access_p
686538fd1498Szrj && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
686638fd1498Szrj && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
686738fd1498Szrj && integer_zerop (DR_OFFSET (first_dr))
686838fd1498Szrj && integer_zerop (DR_INIT (first_dr))
686938fd1498Szrj && alias_sets_conflict_p (get_alias_set (aggr_type),
687038fd1498Szrj get_alias_set (TREE_TYPE (ref_type))))
687138fd1498Szrj {
687238fd1498Szrj dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
687338fd1498Szrj dataref_offset = build_int_cst (ref_type, 0);
687438fd1498Szrj inv_p = false;
687538fd1498Szrj }
687638fd1498Szrj else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
687738fd1498Szrj {
687838fd1498Szrj vect_get_gather_scatter_ops (loop, stmt, &gs_info,
687938fd1498Szrj &dataref_ptr, &vec_offset);
688038fd1498Szrj inv_p = false;
688138fd1498Szrj }
688238fd1498Szrj else
688338fd1498Szrj dataref_ptr
688438fd1498Szrj = vect_create_data_ref_ptr (first_stmt, aggr_type,
688538fd1498Szrj simd_lane_access_p ? loop : NULL,
688638fd1498Szrj offset, &dummy, gsi, &ptr_incr,
688738fd1498Szrj simd_lane_access_p, &inv_p,
688838fd1498Szrj NULL_TREE, bump);
688938fd1498Szrj gcc_assert (bb_vinfo || !inv_p);
689038fd1498Szrj }
689138fd1498Szrj else
689238fd1498Szrj {
689338fd1498Szrj /* For interleaved stores we created vectorized defs for all the
689438fd1498Szrj defs stored in OPRNDS in the previous iteration (previous copy).
689538fd1498Szrj DR_CHAIN is then used as an input to vect_permute_store_chain(),
689638fd1498Szrj and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
689738fd1498Szrj next copy.
689838fd1498Szrj If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
689938fd1498Szrj OPRNDS are of size 1. */
690038fd1498Szrj for (i = 0; i < group_size; i++)
690138fd1498Szrj {
690238fd1498Szrj op = oprnds[i];
690338fd1498Szrj vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
690438fd1498Szrj vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
690538fd1498Szrj dr_chain[i] = vec_oprnd;
690638fd1498Szrj oprnds[i] = vec_oprnd;
690738fd1498Szrj }
690838fd1498Szrj if (mask)
690938fd1498Szrj vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
691038fd1498Szrj if (dataref_offset)
691138fd1498Szrj dataref_offset
691238fd1498Szrj = int_const_binop (PLUS_EXPR, dataref_offset, bump);
691338fd1498Szrj else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
691438fd1498Szrj vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
691538fd1498Szrj vec_offset);
691638fd1498Szrj else
691738fd1498Szrj dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
691838fd1498Szrj bump);
691938fd1498Szrj }
692038fd1498Szrj
692138fd1498Szrj if (memory_access_type == VMAT_LOAD_STORE_LANES)
692238fd1498Szrj {
692338fd1498Szrj tree vec_array;
692438fd1498Szrj
692538fd1498Szrj /* Combine all the vectors into an array. */
692638fd1498Szrj vec_array = create_vector_array (vectype, vec_num);
692738fd1498Szrj for (i = 0; i < vec_num; i++)
692838fd1498Szrj {
692938fd1498Szrj vec_oprnd = dr_chain[i];
693038fd1498Szrj write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
693138fd1498Szrj }
693238fd1498Szrj
693338fd1498Szrj tree final_mask = NULL;
693438fd1498Szrj if (loop_masks)
693538fd1498Szrj final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
693638fd1498Szrj vectype, j);
693738fd1498Szrj if (vec_mask)
693838fd1498Szrj final_mask = prepare_load_store_mask (mask_vectype, final_mask,
693938fd1498Szrj vec_mask, gsi);
694038fd1498Szrj
694138fd1498Szrj gcall *call;
694238fd1498Szrj if (final_mask)
694338fd1498Szrj {
694438fd1498Szrj /* Emit:
694538fd1498Szrj MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
694638fd1498Szrj VEC_ARRAY). */
694738fd1498Szrj unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
694838fd1498Szrj tree alias_ptr = build_int_cst (ref_type, align);
694938fd1498Szrj call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
695038fd1498Szrj dataref_ptr, alias_ptr,
695138fd1498Szrj final_mask, vec_array);
695238fd1498Szrj }
695338fd1498Szrj else
695438fd1498Szrj {
695538fd1498Szrj /* Emit:
695638fd1498Szrj MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
695738fd1498Szrj data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
695838fd1498Szrj call = gimple_build_call_internal (IFN_STORE_LANES, 1,
695938fd1498Szrj vec_array);
696038fd1498Szrj gimple_call_set_lhs (call, data_ref);
696138fd1498Szrj }
696238fd1498Szrj gimple_call_set_nothrow (call, true);
696338fd1498Szrj new_stmt = call;
696438fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
696538fd1498Szrj }
696638fd1498Szrj else
696738fd1498Szrj {
696838fd1498Szrj new_stmt = NULL;
696938fd1498Szrj if (grouped_store)
697038fd1498Szrj {
697138fd1498Szrj if (j == 0)
697238fd1498Szrj result_chain.create (group_size);
697338fd1498Szrj /* Permute. */
697438fd1498Szrj vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
697538fd1498Szrj &result_chain);
697638fd1498Szrj }
697738fd1498Szrj
697838fd1498Szrj next_stmt = first_stmt;
697938fd1498Szrj for (i = 0; i < vec_num; i++)
698038fd1498Szrj {
698138fd1498Szrj unsigned align, misalign;
698238fd1498Szrj
698338fd1498Szrj tree final_mask = NULL_TREE;
698438fd1498Szrj if (loop_masks)
698538fd1498Szrj final_mask = vect_get_loop_mask (gsi, loop_masks,
698638fd1498Szrj vec_num * ncopies,
698738fd1498Szrj vectype, vec_num * j + i);
698838fd1498Szrj if (vec_mask)
698938fd1498Szrj final_mask = prepare_load_store_mask (mask_vectype, final_mask,
699038fd1498Szrj vec_mask, gsi);
699138fd1498Szrj
699238fd1498Szrj if (memory_access_type == VMAT_GATHER_SCATTER)
699338fd1498Szrj {
699438fd1498Szrj tree scale = size_int (gs_info.scale);
699538fd1498Szrj gcall *call;
699638fd1498Szrj if (loop_masks)
699738fd1498Szrj call = gimple_build_call_internal
699838fd1498Szrj (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
699938fd1498Szrj scale, vec_oprnd, final_mask);
700038fd1498Szrj else
700138fd1498Szrj call = gimple_build_call_internal
700238fd1498Szrj (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
700338fd1498Szrj scale, vec_oprnd);
700438fd1498Szrj gimple_call_set_nothrow (call, true);
700538fd1498Szrj new_stmt = call;
700638fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
700738fd1498Szrj break;
700838fd1498Szrj }
700938fd1498Szrj
701038fd1498Szrj if (i > 0)
701138fd1498Szrj /* Bump the vector pointer. */
701238fd1498Szrj dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
701338fd1498Szrj stmt, bump);
701438fd1498Szrj
701538fd1498Szrj if (slp)
701638fd1498Szrj vec_oprnd = vec_oprnds[i];
701738fd1498Szrj else if (grouped_store)
701838fd1498Szrj /* For grouped stores vectorized defs are interleaved in
701938fd1498Szrj vect_permute_store_chain(). */
702038fd1498Szrj vec_oprnd = result_chain[i];
702138fd1498Szrj
702238fd1498Szrj align = DR_TARGET_ALIGNMENT (first_dr);
702338fd1498Szrj if (aligned_access_p (first_dr))
702438fd1498Szrj misalign = 0;
702538fd1498Szrj else if (DR_MISALIGNMENT (first_dr) == -1)
702638fd1498Szrj {
702738fd1498Szrj align = dr_alignment (vect_dr_behavior (first_dr));
702838fd1498Szrj misalign = 0;
702938fd1498Szrj }
703038fd1498Szrj else
703138fd1498Szrj misalign = DR_MISALIGNMENT (first_dr);
703238fd1498Szrj if (dataref_offset == NULL_TREE
703338fd1498Szrj && TREE_CODE (dataref_ptr) == SSA_NAME)
703438fd1498Szrj set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
703538fd1498Szrj misalign);
703638fd1498Szrj
703738fd1498Szrj if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
703838fd1498Szrj {
703938fd1498Szrj tree perm_mask = perm_mask_for_reverse (vectype);
704038fd1498Szrj tree perm_dest
704138fd1498Szrj = vect_create_destination_var (vect_get_store_rhs (stmt),
704238fd1498Szrj vectype);
704338fd1498Szrj tree new_temp = make_ssa_name (perm_dest);
704438fd1498Szrj
704538fd1498Szrj /* Generate the permute statement. */
704638fd1498Szrj gimple *perm_stmt
704738fd1498Szrj = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
704838fd1498Szrj vec_oprnd, perm_mask);
704938fd1498Szrj vect_finish_stmt_generation (stmt, perm_stmt, gsi);
705038fd1498Szrj
705138fd1498Szrj perm_stmt = SSA_NAME_DEF_STMT (new_temp);
705238fd1498Szrj vec_oprnd = new_temp;
705338fd1498Szrj }
705438fd1498Szrj
705538fd1498Szrj /* Arguments are ready. Create the new vector stmt. */
705638fd1498Szrj if (final_mask)
705738fd1498Szrj {
705838fd1498Szrj align = least_bit_hwi (misalign | align);
705938fd1498Szrj tree ptr = build_int_cst (ref_type, align);
706038fd1498Szrj gcall *call
706138fd1498Szrj = gimple_build_call_internal (IFN_MASK_STORE, 4,
706238fd1498Szrj dataref_ptr, ptr,
706338fd1498Szrj final_mask, vec_oprnd);
706438fd1498Szrj gimple_call_set_nothrow (call, true);
706538fd1498Szrj new_stmt = call;
706638fd1498Szrj }
706738fd1498Szrj else
706838fd1498Szrj {
706938fd1498Szrj data_ref = fold_build2 (MEM_REF, vectype,
707038fd1498Szrj dataref_ptr,
707138fd1498Szrj dataref_offset
707238fd1498Szrj ? dataref_offset
707338fd1498Szrj : build_int_cst (ref_type, 0));
707438fd1498Szrj if (aligned_access_p (first_dr))
707538fd1498Szrj ;
707638fd1498Szrj else if (DR_MISALIGNMENT (first_dr) == -1)
707738fd1498Szrj TREE_TYPE (data_ref)
707838fd1498Szrj = build_aligned_type (TREE_TYPE (data_ref),
707938fd1498Szrj align * BITS_PER_UNIT);
708038fd1498Szrj else
708138fd1498Szrj TREE_TYPE (data_ref)
708238fd1498Szrj = build_aligned_type (TREE_TYPE (data_ref),
708338fd1498Szrj TYPE_ALIGN (elem_type));
708438fd1498Szrj vect_copy_ref_info (data_ref, DR_REF (first_dr));
708538fd1498Szrj new_stmt = gimple_build_assign (data_ref, vec_oprnd);
708638fd1498Szrj }
708738fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
708838fd1498Szrj
708938fd1498Szrj if (slp)
709038fd1498Szrj continue;
709138fd1498Szrj
709238fd1498Szrj next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
709338fd1498Szrj if (!next_stmt)
709438fd1498Szrj break;
709538fd1498Szrj }
709638fd1498Szrj }
709738fd1498Szrj if (!slp)
709838fd1498Szrj {
709938fd1498Szrj if (j == 0)
710038fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
710138fd1498Szrj else
710238fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
710338fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
710438fd1498Szrj }
710538fd1498Szrj }
710638fd1498Szrj
710738fd1498Szrj oprnds.release ();
710838fd1498Szrj result_chain.release ();
710938fd1498Szrj vec_oprnds.release ();
711038fd1498Szrj
711138fd1498Szrj return true;
711238fd1498Szrj }
711338fd1498Szrj
711438fd1498Szrj /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
711538fd1498Szrj VECTOR_CST mask. No checks are made that the target platform supports the
711638fd1498Szrj mask, so callers may wish to test can_vec_perm_const_p separately, or use
711738fd1498Szrj vect_gen_perm_mask_checked. */
711838fd1498Szrj
711938fd1498Szrj tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)712038fd1498Szrj vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
712138fd1498Szrj {
712238fd1498Szrj tree mask_type;
712338fd1498Szrj
712438fd1498Szrj poly_uint64 nunits = sel.length ();
712538fd1498Szrj gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
712638fd1498Szrj
712738fd1498Szrj mask_type = build_vector_type (ssizetype, nunits);
712838fd1498Szrj return vec_perm_indices_to_tree (mask_type, sel);
712938fd1498Szrj }
713038fd1498Szrj
713138fd1498Szrj /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
713238fd1498Szrj i.e. that the target supports the pattern _for arbitrary input vectors_. */
713338fd1498Szrj
713438fd1498Szrj tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)713538fd1498Szrj vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
713638fd1498Szrj {
713738fd1498Szrj gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
713838fd1498Szrj return vect_gen_perm_mask_any (vectype, sel);
713938fd1498Szrj }
714038fd1498Szrj
714138fd1498Szrj /* Given a vector variable X and Y, that was generated for the scalar
714238fd1498Szrj STMT, generate instructions to permute the vector elements of X and Y
714338fd1498Szrj using permutation mask MASK_VEC, insert them at *GSI and return the
714438fd1498Szrj permuted vector variable. */
714538fd1498Szrj
714638fd1498Szrj static tree
permute_vec_elements(tree x,tree y,tree mask_vec,gimple * stmt,gimple_stmt_iterator * gsi)714738fd1498Szrj permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
714838fd1498Szrj gimple_stmt_iterator *gsi)
714938fd1498Szrj {
715038fd1498Szrj tree vectype = TREE_TYPE (x);
715138fd1498Szrj tree perm_dest, data_ref;
715238fd1498Szrj gimple *perm_stmt;
715338fd1498Szrj
715438fd1498Szrj tree scalar_dest = gimple_get_lhs (stmt);
715538fd1498Szrj if (TREE_CODE (scalar_dest) == SSA_NAME)
715638fd1498Szrj perm_dest = vect_create_destination_var (scalar_dest, vectype);
715738fd1498Szrj else
715838fd1498Szrj perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
715938fd1498Szrj data_ref = make_ssa_name (perm_dest);
716038fd1498Szrj
716138fd1498Szrj /* Generate the permute statement. */
716238fd1498Szrj perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
716338fd1498Szrj vect_finish_stmt_generation (stmt, perm_stmt, gsi);
716438fd1498Szrj
716538fd1498Szrj return data_ref;
716638fd1498Szrj }
716738fd1498Szrj
716838fd1498Szrj /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
716938fd1498Szrj inserting them on the loops preheader edge. Returns true if we
717038fd1498Szrj were successful in doing so (and thus STMT can be moved then),
717138fd1498Szrj otherwise returns false. */
717238fd1498Szrj
717338fd1498Szrj static bool
hoist_defs_of_uses(gimple * stmt,struct loop * loop)717438fd1498Szrj hoist_defs_of_uses (gimple *stmt, struct loop *loop)
717538fd1498Szrj {
717638fd1498Szrj ssa_op_iter i;
717738fd1498Szrj tree op;
717838fd1498Szrj bool any = false;
717938fd1498Szrj
718038fd1498Szrj FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
718138fd1498Szrj {
718238fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (op);
718338fd1498Szrj if (!gimple_nop_p (def_stmt)
718438fd1498Szrj && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
718538fd1498Szrj {
718638fd1498Szrj /* Make sure we don't need to recurse. While we could do
718738fd1498Szrj so in simple cases when there are more complex use webs
718838fd1498Szrj we don't have an easy way to preserve stmt order to fulfil
718938fd1498Szrj dependencies within them. */
719038fd1498Szrj tree op2;
719138fd1498Szrj ssa_op_iter i2;
719238fd1498Szrj if (gimple_code (def_stmt) == GIMPLE_PHI)
719338fd1498Szrj return false;
719438fd1498Szrj FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
719538fd1498Szrj {
719638fd1498Szrj gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
719738fd1498Szrj if (!gimple_nop_p (def_stmt2)
719838fd1498Szrj && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
719938fd1498Szrj return false;
720038fd1498Szrj }
720138fd1498Szrj any = true;
720238fd1498Szrj }
720338fd1498Szrj }
720438fd1498Szrj
720538fd1498Szrj if (!any)
720638fd1498Szrj return true;
720738fd1498Szrj
720838fd1498Szrj FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
720938fd1498Szrj {
721038fd1498Szrj gimple *def_stmt = SSA_NAME_DEF_STMT (op);
721138fd1498Szrj if (!gimple_nop_p (def_stmt)
721238fd1498Szrj && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
721338fd1498Szrj {
721438fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
721538fd1498Szrj gsi_remove (&gsi, false);
721638fd1498Szrj gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
721738fd1498Szrj }
721838fd1498Szrj }
721938fd1498Szrj
722038fd1498Szrj return true;
722138fd1498Szrj }
722238fd1498Szrj
722338fd1498Szrj /* vectorizable_load.
722438fd1498Szrj
722538fd1498Szrj Check if STMT reads a non scalar data-ref (array/pointer/structure) that
722638fd1498Szrj can be vectorized.
722738fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
722838fd1498Szrj stmt to replace it, put it in VEC_STMT, and insert it at BSI.
722938fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
723038fd1498Szrj
723138fd1498Szrj static bool
vectorizable_load(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_instance slp_node_instance)723238fd1498Szrj vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
723338fd1498Szrj slp_tree slp_node, slp_instance slp_node_instance)
723438fd1498Szrj {
723538fd1498Szrj tree scalar_dest;
723638fd1498Szrj tree vec_dest = NULL;
723738fd1498Szrj tree data_ref = NULL;
723838fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
723938fd1498Szrj stmt_vec_info prev_stmt_info;
724038fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
724138fd1498Szrj struct loop *loop = NULL;
724238fd1498Szrj struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
724338fd1498Szrj bool nested_in_vect_loop = false;
724438fd1498Szrj struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
724538fd1498Szrj tree elem_type;
724638fd1498Szrj tree new_temp;
724738fd1498Szrj machine_mode mode;
724838fd1498Szrj gimple *new_stmt = NULL;
724938fd1498Szrj tree dummy;
725038fd1498Szrj enum dr_alignment_support alignment_support_scheme;
725138fd1498Szrj tree dataref_ptr = NULL_TREE;
725238fd1498Szrj tree dataref_offset = NULL_TREE;
725338fd1498Szrj gimple *ptr_incr = NULL;
725438fd1498Szrj int ncopies;
725538fd1498Szrj int i, j;
725638fd1498Szrj unsigned int group_size;
725738fd1498Szrj poly_uint64 group_gap_adj;
725838fd1498Szrj tree msq = NULL_TREE, lsq;
725938fd1498Szrj tree offset = NULL_TREE;
726038fd1498Szrj tree byte_offset = NULL_TREE;
726138fd1498Szrj tree realignment_token = NULL_TREE;
726238fd1498Szrj gphi *phi = NULL;
726338fd1498Szrj vec<tree> dr_chain = vNULL;
726438fd1498Szrj bool grouped_load = false;
726538fd1498Szrj gimple *first_stmt;
726638fd1498Szrj gimple *first_stmt_for_drptr = NULL;
726738fd1498Szrj bool inv_p;
726838fd1498Szrj bool compute_in_loop = false;
726938fd1498Szrj struct loop *at_loop;
727038fd1498Szrj int vec_num;
727138fd1498Szrj bool slp = (slp_node != NULL);
727238fd1498Szrj bool slp_perm = false;
727338fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
727438fd1498Szrj poly_uint64 vf;
727538fd1498Szrj tree aggr_type;
727638fd1498Szrj gather_scatter_info gs_info;
727738fd1498Szrj vec_info *vinfo = stmt_info->vinfo;
727838fd1498Szrj tree ref_type;
727938fd1498Szrj enum vect_def_type mask_dt = vect_unknown_def_type;
728038fd1498Szrj
728138fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
728238fd1498Szrj return false;
728338fd1498Szrj
728438fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
728538fd1498Szrj && ! vec_stmt)
728638fd1498Szrj return false;
728738fd1498Szrj
728838fd1498Szrj tree mask = NULL_TREE, mask_vectype = NULL_TREE;
728938fd1498Szrj if (is_gimple_assign (stmt))
729038fd1498Szrj {
729138fd1498Szrj scalar_dest = gimple_assign_lhs (stmt);
729238fd1498Szrj if (TREE_CODE (scalar_dest) != SSA_NAME)
729338fd1498Szrj return false;
729438fd1498Szrj
729538fd1498Szrj tree_code code = gimple_assign_rhs_code (stmt);
729638fd1498Szrj if (code != ARRAY_REF
729738fd1498Szrj && code != BIT_FIELD_REF
729838fd1498Szrj && code != INDIRECT_REF
729938fd1498Szrj && code != COMPONENT_REF
730038fd1498Szrj && code != IMAGPART_EXPR
730138fd1498Szrj && code != REALPART_EXPR
730238fd1498Szrj && code != MEM_REF
730338fd1498Szrj && TREE_CODE_CLASS (code) != tcc_declaration)
730438fd1498Szrj return false;
730538fd1498Szrj }
730638fd1498Szrj else
730738fd1498Szrj {
730838fd1498Szrj gcall *call = dyn_cast <gcall *> (stmt);
730938fd1498Szrj if (!call || !gimple_call_internal_p (call))
731038fd1498Szrj return false;
731138fd1498Szrj
731238fd1498Szrj internal_fn ifn = gimple_call_internal_fn (call);
731338fd1498Szrj if (!internal_load_fn_p (ifn))
731438fd1498Szrj return false;
731538fd1498Szrj
731638fd1498Szrj scalar_dest = gimple_call_lhs (call);
731738fd1498Szrj if (!scalar_dest)
731838fd1498Szrj return false;
731938fd1498Szrj
732038fd1498Szrj if (slp_node != NULL)
732138fd1498Szrj {
732238fd1498Szrj if (dump_enabled_p ())
732338fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732438fd1498Szrj "SLP of masked loads not supported.\n");
732538fd1498Szrj return false;
732638fd1498Szrj }
732738fd1498Szrj
732838fd1498Szrj int mask_index = internal_fn_mask_index (ifn);
732938fd1498Szrj if (mask_index >= 0)
733038fd1498Szrj {
733138fd1498Szrj mask = gimple_call_arg (call, mask_index);
733238fd1498Szrj if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
733338fd1498Szrj &mask_vectype))
733438fd1498Szrj return false;
733538fd1498Szrj }
733638fd1498Szrj }
733738fd1498Szrj
733838fd1498Szrj if (!STMT_VINFO_DATA_REF (stmt_info))
733938fd1498Szrj return false;
734038fd1498Szrj
734138fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
734238fd1498Szrj poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
734338fd1498Szrj
734438fd1498Szrj if (loop_vinfo)
734538fd1498Szrj {
734638fd1498Szrj loop = LOOP_VINFO_LOOP (loop_vinfo);
734738fd1498Szrj nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
734838fd1498Szrj vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
734938fd1498Szrj }
735038fd1498Szrj else
735138fd1498Szrj vf = 1;
735238fd1498Szrj
735338fd1498Szrj /* Multiple types in SLP are handled by creating the appropriate number of
735438fd1498Szrj vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
735538fd1498Szrj case of SLP. */
735638fd1498Szrj if (slp)
735738fd1498Szrj ncopies = 1;
735838fd1498Szrj else
735938fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
736038fd1498Szrj
736138fd1498Szrj gcc_assert (ncopies >= 1);
736238fd1498Szrj
736338fd1498Szrj /* FORNOW. This restriction should be relaxed. */
736438fd1498Szrj if (nested_in_vect_loop && ncopies > 1)
736538fd1498Szrj {
736638fd1498Szrj if (dump_enabled_p ())
736738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
736838fd1498Szrj "multiple types in nested loop.\n");
736938fd1498Szrj return false;
737038fd1498Szrj }
737138fd1498Szrj
737238fd1498Szrj /* Invalidate assumptions made by dependence analysis when vectorization
737338fd1498Szrj on the unrolled body effectively re-orders stmts. */
737438fd1498Szrj if (ncopies > 1
737538fd1498Szrj && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
737638fd1498Szrj && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
737738fd1498Szrj STMT_VINFO_MIN_NEG_DIST (stmt_info)))
737838fd1498Szrj {
737938fd1498Szrj if (dump_enabled_p ())
738038fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738138fd1498Szrj "cannot perform implicit CSE when unrolling "
738238fd1498Szrj "with negative dependence distance\n");
738338fd1498Szrj return false;
738438fd1498Szrj }
738538fd1498Szrj
738638fd1498Szrj elem_type = TREE_TYPE (vectype);
738738fd1498Szrj mode = TYPE_MODE (vectype);
738838fd1498Szrj
738938fd1498Szrj /* FORNOW. In some cases can vectorize even if data-type not supported
739038fd1498Szrj (e.g. - data copies). */
739138fd1498Szrj if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
739238fd1498Szrj {
739338fd1498Szrj if (dump_enabled_p ())
739438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739538fd1498Szrj "Aligned load, but unsupported type.\n");
739638fd1498Szrj return false;
739738fd1498Szrj }
739838fd1498Szrj
739938fd1498Szrj /* Check if the load is a part of an interleaving chain. */
740038fd1498Szrj if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
740138fd1498Szrj {
740238fd1498Szrj grouped_load = true;
740338fd1498Szrj /* FORNOW */
740438fd1498Szrj gcc_assert (!nested_in_vect_loop);
740538fd1498Szrj gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
740638fd1498Szrj
740738fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
740838fd1498Szrj group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
740938fd1498Szrj
741038fd1498Szrj if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
741138fd1498Szrj slp_perm = true;
741238fd1498Szrj
741338fd1498Szrj /* Invalidate assumptions made by dependence analysis when vectorization
741438fd1498Szrj on the unrolled body effectively re-orders stmts. */
741538fd1498Szrj if (!PURE_SLP_STMT (stmt_info)
741638fd1498Szrj && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
741738fd1498Szrj && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
741838fd1498Szrj STMT_VINFO_MIN_NEG_DIST (stmt_info)))
741938fd1498Szrj {
742038fd1498Szrj if (dump_enabled_p ())
742138fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742238fd1498Szrj "cannot perform implicit CSE when performing "
742338fd1498Szrj "group loads with negative dependence distance\n");
742438fd1498Szrj return false;
742538fd1498Szrj }
742638fd1498Szrj
742738fd1498Szrj /* Similarly when the stmt is a load that is both part of a SLP
742838fd1498Szrj instance and a loop vectorized stmt via the same-dr mechanism
742938fd1498Szrj we have to give up. */
743038fd1498Szrj if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
743138fd1498Szrj && (STMT_SLP_TYPE (stmt_info)
743238fd1498Szrj != STMT_SLP_TYPE (vinfo_for_stmt
743338fd1498Szrj (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
743438fd1498Szrj {
743538fd1498Szrj if (dump_enabled_p ())
743638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743738fd1498Szrj "conflicting SLP types for CSEd load\n");
743838fd1498Szrj return false;
743938fd1498Szrj }
744038fd1498Szrj }
744138fd1498Szrj else
744238fd1498Szrj group_size = 1;
744338fd1498Szrj
744438fd1498Szrj vect_memory_access_type memory_access_type;
744538fd1498Szrj if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
744638fd1498Szrj &memory_access_type, &gs_info))
744738fd1498Szrj return false;
744838fd1498Szrj
744938fd1498Szrj if (mask)
745038fd1498Szrj {
745138fd1498Szrj if (memory_access_type == VMAT_CONTIGUOUS)
745238fd1498Szrj {
745338fd1498Szrj machine_mode vec_mode = TYPE_MODE (vectype);
745438fd1498Szrj if (!VECTOR_MODE_P (vec_mode)
745538fd1498Szrj || !can_vec_mask_load_store_p (vec_mode,
745638fd1498Szrj TYPE_MODE (mask_vectype), true))
745738fd1498Szrj return false;
745838fd1498Szrj }
745938fd1498Szrj else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
746038fd1498Szrj {
746138fd1498Szrj tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
746238fd1498Szrj tree masktype
746338fd1498Szrj = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
746438fd1498Szrj if (TREE_CODE (masktype) == INTEGER_TYPE)
746538fd1498Szrj {
746638fd1498Szrj if (dump_enabled_p ())
746738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
746838fd1498Szrj "masked gather with integer mask not"
746938fd1498Szrj " supported.");
747038fd1498Szrj return false;
747138fd1498Szrj }
747238fd1498Szrj }
747338fd1498Szrj else if (memory_access_type != VMAT_LOAD_STORE_LANES
747438fd1498Szrj && memory_access_type != VMAT_GATHER_SCATTER)
747538fd1498Szrj {
747638fd1498Szrj if (dump_enabled_p ())
747738fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
747838fd1498Szrj "unsupported access type for masked load.\n");
747938fd1498Szrj return false;
748038fd1498Szrj }
748138fd1498Szrj }
748238fd1498Szrj
748338fd1498Szrj if (!vec_stmt) /* transformation not required. */
748438fd1498Szrj {
748538fd1498Szrj if (!slp)
748638fd1498Szrj STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
748738fd1498Szrj
748838fd1498Szrj if (loop_vinfo
748938fd1498Szrj && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
749038fd1498Szrj check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
749138fd1498Szrj memory_access_type, &gs_info);
749238fd1498Szrj
749338fd1498Szrj STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
749438fd1498Szrj /* The SLP costs are calculated during SLP analysis. */
749538fd1498Szrj if (! slp_node)
749638fd1498Szrj vect_model_load_cost (stmt_info, ncopies, memory_access_type,
749738fd1498Szrj NULL, NULL, NULL);
749838fd1498Szrj return true;
749938fd1498Szrj }
750038fd1498Szrj
750138fd1498Szrj if (!slp)
750238fd1498Szrj gcc_assert (memory_access_type
750338fd1498Szrj == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
750438fd1498Szrj
750538fd1498Szrj if (dump_enabled_p ())
750638fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
750738fd1498Szrj "transform load. ncopies = %d\n", ncopies);
750838fd1498Szrj
750938fd1498Szrj /* Transform. */
751038fd1498Szrj
751138fd1498Szrj ensure_base_align (dr);
751238fd1498Szrj
751338fd1498Szrj if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
751438fd1498Szrj {
751538fd1498Szrj vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
751638fd1498Szrj mask_dt);
751738fd1498Szrj return true;
751838fd1498Szrj }
751938fd1498Szrj
752038fd1498Szrj if (memory_access_type == VMAT_ELEMENTWISE
752138fd1498Szrj || memory_access_type == VMAT_STRIDED_SLP)
752238fd1498Szrj {
752338fd1498Szrj gimple_stmt_iterator incr_gsi;
752438fd1498Szrj bool insert_after;
752538fd1498Szrj gimple *incr;
752638fd1498Szrj tree offvar;
752738fd1498Szrj tree ivstep;
752838fd1498Szrj tree running_off;
752938fd1498Szrj vec<constructor_elt, va_gc> *v = NULL;
753038fd1498Szrj tree stride_base, stride_step, alias_off;
753138fd1498Szrj /* Checked by get_load_store_type. */
753238fd1498Szrj unsigned int const_nunits = nunits.to_constant ();
753338fd1498Szrj unsigned HOST_WIDE_INT cst_offset = 0;
753438fd1498Szrj
753538fd1498Szrj gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
753638fd1498Szrj gcc_assert (!nested_in_vect_loop);
753738fd1498Szrj
753838fd1498Szrj if (grouped_load)
753938fd1498Szrj {
754038fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
754138fd1498Szrj first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
754238fd1498Szrj }
754338fd1498Szrj else
754438fd1498Szrj {
754538fd1498Szrj first_stmt = stmt;
754638fd1498Szrj first_dr = dr;
754738fd1498Szrj }
754838fd1498Szrj if (slp && grouped_load)
754938fd1498Szrj {
755038fd1498Szrj group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
755138fd1498Szrj ref_type = get_group_alias_ptr_type (first_stmt);
755238fd1498Szrj }
755338fd1498Szrj else
755438fd1498Szrj {
755538fd1498Szrj if (grouped_load)
755638fd1498Szrj cst_offset
755738fd1498Szrj = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
755838fd1498Szrj * vect_get_place_in_interleaving_chain (stmt, first_stmt));
755938fd1498Szrj group_size = 1;
756038fd1498Szrj ref_type = reference_alias_ptr_type (DR_REF (dr));
756138fd1498Szrj }
756238fd1498Szrj
756338fd1498Szrj stride_base
756438fd1498Szrj = fold_build_pointer_plus
756538fd1498Szrj (DR_BASE_ADDRESS (first_dr),
756638fd1498Szrj size_binop (PLUS_EXPR,
756738fd1498Szrj convert_to_ptrofftype (DR_OFFSET (first_dr)),
756838fd1498Szrj convert_to_ptrofftype (DR_INIT (first_dr))));
756938fd1498Szrj stride_step = fold_convert (sizetype, DR_STEP (first_dr));
757038fd1498Szrj
757138fd1498Szrj /* For a load with loop-invariant (but other than power-of-2)
757238fd1498Szrj stride (i.e. not a grouped access) like so:
757338fd1498Szrj
757438fd1498Szrj for (i = 0; i < n; i += stride)
757538fd1498Szrj ... = array[i];
757638fd1498Szrj
757738fd1498Szrj we generate a new induction variable and new accesses to
757838fd1498Szrj form a new vector (or vectors, depending on ncopies):
757938fd1498Szrj
758038fd1498Szrj for (j = 0; ; j += VF*stride)
758138fd1498Szrj tmp1 = array[j];
758238fd1498Szrj tmp2 = array[j + stride];
758338fd1498Szrj ...
758438fd1498Szrj vectemp = {tmp1, tmp2, ...}
758538fd1498Szrj */
758638fd1498Szrj
758738fd1498Szrj ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
758838fd1498Szrj build_int_cst (TREE_TYPE (stride_step), vf));
758938fd1498Szrj
759038fd1498Szrj standard_iv_increment_position (loop, &incr_gsi, &insert_after);
759138fd1498Szrj
759238fd1498Szrj stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
759338fd1498Szrj ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
759438fd1498Szrj create_iv (stride_base, ivstep, NULL,
759538fd1498Szrj loop, &incr_gsi, insert_after,
759638fd1498Szrj &offvar, NULL);
759738fd1498Szrj incr = gsi_stmt (incr_gsi);
759838fd1498Szrj set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
759938fd1498Szrj
760038fd1498Szrj stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
760138fd1498Szrj
760238fd1498Szrj prev_stmt_info = NULL;
760338fd1498Szrj running_off = offvar;
760438fd1498Szrj alias_off = build_int_cst (ref_type, 0);
760538fd1498Szrj int nloads = const_nunits;
760638fd1498Szrj int lnel = 1;
760738fd1498Szrj tree ltype = TREE_TYPE (vectype);
760838fd1498Szrj tree lvectype = vectype;
760938fd1498Szrj auto_vec<tree> dr_chain;
761038fd1498Szrj if (memory_access_type == VMAT_STRIDED_SLP)
761138fd1498Szrj {
761238fd1498Szrj if (group_size < const_nunits)
761338fd1498Szrj {
761438fd1498Szrj /* First check if vec_init optab supports construction from
761538fd1498Szrj vector elts directly. */
761638fd1498Szrj scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
761738fd1498Szrj machine_mode vmode;
761838fd1498Szrj if (mode_for_vector (elmode, group_size).exists (&vmode)
761938fd1498Szrj && VECTOR_MODE_P (vmode)
762038fd1498Szrj && targetm.vector_mode_supported_p (vmode)
762138fd1498Szrj && (convert_optab_handler (vec_init_optab,
762238fd1498Szrj TYPE_MODE (vectype), vmode)
762338fd1498Szrj != CODE_FOR_nothing))
762438fd1498Szrj {
762538fd1498Szrj nloads = const_nunits / group_size;
762638fd1498Szrj lnel = group_size;
762738fd1498Szrj ltype = build_vector_type (TREE_TYPE (vectype), group_size);
762838fd1498Szrj }
762938fd1498Szrj else
763038fd1498Szrj {
763138fd1498Szrj /* Otherwise avoid emitting a constructor of vector elements
763238fd1498Szrj by performing the loads using an integer type of the same
763338fd1498Szrj size, constructing a vector of those and then
763438fd1498Szrj re-interpreting it as the original vector type.
763538fd1498Szrj This avoids a huge runtime penalty due to the general
763638fd1498Szrj inability to perform store forwarding from smaller stores
763738fd1498Szrj to a larger load. */
763838fd1498Szrj unsigned lsize
763938fd1498Szrj = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
764038fd1498Szrj elmode = int_mode_for_size (lsize, 0).require ();
764138fd1498Szrj unsigned int lnunits = const_nunits / group_size;
764238fd1498Szrj /* If we can't construct such a vector fall back to
764338fd1498Szrj element loads of the original vector type. */
764438fd1498Szrj if (mode_for_vector (elmode, lnunits).exists (&vmode)
764538fd1498Szrj && VECTOR_MODE_P (vmode)
764638fd1498Szrj && targetm.vector_mode_supported_p (vmode)
764738fd1498Szrj && (convert_optab_handler (vec_init_optab, vmode, elmode)
764838fd1498Szrj != CODE_FOR_nothing))
764938fd1498Szrj {
765038fd1498Szrj nloads = lnunits;
765138fd1498Szrj lnel = group_size;
765238fd1498Szrj ltype = build_nonstandard_integer_type (lsize, 1);
765338fd1498Szrj lvectype = build_vector_type (ltype, nloads);
765438fd1498Szrj }
765538fd1498Szrj }
765638fd1498Szrj }
765738fd1498Szrj else
765838fd1498Szrj {
765938fd1498Szrj nloads = 1;
766038fd1498Szrj lnel = const_nunits;
766138fd1498Szrj ltype = vectype;
766238fd1498Szrj }
766338fd1498Szrj ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
766438fd1498Szrj }
7665*58e805e6Szrj /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7666*58e805e6Szrj else if (nloads == 1)
7667*58e805e6Szrj ltype = vectype;
7668*58e805e6Szrj
766938fd1498Szrj if (slp)
767038fd1498Szrj {
767138fd1498Szrj /* For SLP permutation support we need to load the whole group,
767238fd1498Szrj not only the number of vector stmts the permutation result
767338fd1498Szrj fits in. */
767438fd1498Szrj if (slp_perm)
767538fd1498Szrj {
767638fd1498Szrj /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
767738fd1498Szrj variable VF. */
767838fd1498Szrj unsigned int const_vf = vf.to_constant ();
767938fd1498Szrj ncopies = CEIL (group_size * const_vf, const_nunits);
768038fd1498Szrj dr_chain.create (ncopies);
768138fd1498Szrj }
768238fd1498Szrj else
768338fd1498Szrj ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
768438fd1498Szrj }
768538fd1498Szrj unsigned int group_el = 0;
768638fd1498Szrj unsigned HOST_WIDE_INT
768738fd1498Szrj elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
768838fd1498Szrj for (j = 0; j < ncopies; j++)
768938fd1498Szrj {
769038fd1498Szrj if (nloads > 1)
769138fd1498Szrj vec_alloc (v, nloads);
769238fd1498Szrj for (i = 0; i < nloads; i++)
769338fd1498Szrj {
769438fd1498Szrj tree this_off = build_int_cst (TREE_TYPE (alias_off),
769538fd1498Szrj group_el * elsz + cst_offset);
769638fd1498Szrj tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
769738fd1498Szrj vect_copy_ref_info (data_ref, DR_REF (first_dr));
769838fd1498Szrj new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
769938fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
770038fd1498Szrj if (nloads > 1)
770138fd1498Szrj CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
770238fd1498Szrj gimple_assign_lhs (new_stmt));
770338fd1498Szrj
770438fd1498Szrj group_el += lnel;
770538fd1498Szrj if (! slp
770638fd1498Szrj || group_el == group_size)
770738fd1498Szrj {
770838fd1498Szrj tree newoff = copy_ssa_name (running_off);
770938fd1498Szrj gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
771038fd1498Szrj running_off, stride_step);
771138fd1498Szrj vect_finish_stmt_generation (stmt, incr, gsi);
771238fd1498Szrj
771338fd1498Szrj running_off = newoff;
771438fd1498Szrj group_el = 0;
771538fd1498Szrj }
771638fd1498Szrj }
771738fd1498Szrj if (nloads > 1)
771838fd1498Szrj {
771938fd1498Szrj tree vec_inv = build_constructor (lvectype, v);
772038fd1498Szrj new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
772138fd1498Szrj new_stmt = SSA_NAME_DEF_STMT (new_temp);
772238fd1498Szrj if (lvectype != vectype)
772338fd1498Szrj {
772438fd1498Szrj new_stmt = gimple_build_assign (make_ssa_name (vectype),
772538fd1498Szrj VIEW_CONVERT_EXPR,
772638fd1498Szrj build1 (VIEW_CONVERT_EXPR,
772738fd1498Szrj vectype, new_temp));
772838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
772938fd1498Szrj }
773038fd1498Szrj }
773138fd1498Szrj
773238fd1498Szrj if (slp)
773338fd1498Szrj {
773438fd1498Szrj if (slp_perm)
773538fd1498Szrj dr_chain.quick_push (gimple_assign_lhs (new_stmt));
773638fd1498Szrj else
773738fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
773838fd1498Szrj }
773938fd1498Szrj else
774038fd1498Szrj {
774138fd1498Szrj if (j == 0)
774238fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
774338fd1498Szrj else
774438fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
774538fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
774638fd1498Szrj }
774738fd1498Szrj }
774838fd1498Szrj if (slp_perm)
774938fd1498Szrj {
775038fd1498Szrj unsigned n_perms;
775138fd1498Szrj vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
775238fd1498Szrj slp_node_instance, false, &n_perms);
775338fd1498Szrj }
775438fd1498Szrj return true;
775538fd1498Szrj }
775638fd1498Szrj
775738fd1498Szrj if (memory_access_type == VMAT_GATHER_SCATTER
775838fd1498Szrj || (!slp && memory_access_type == VMAT_CONTIGUOUS))
775938fd1498Szrj grouped_load = false;
776038fd1498Szrj
776138fd1498Szrj if (grouped_load)
776238fd1498Szrj {
776338fd1498Szrj first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
776438fd1498Szrj group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
776538fd1498Szrj /* For SLP vectorization we directly vectorize a subchain
776638fd1498Szrj without permutation. */
776738fd1498Szrj if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
776838fd1498Szrj first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
776938fd1498Szrj /* For BB vectorization always use the first stmt to base
777038fd1498Szrj the data ref pointer on. */
777138fd1498Szrj if (bb_vinfo)
777238fd1498Szrj first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
777338fd1498Szrj
777438fd1498Szrj /* Check if the chain of loads is already vectorized. */
777538fd1498Szrj if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
777638fd1498Szrj /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
777738fd1498Szrj ??? But we can only do so if there is exactly one
777838fd1498Szrj as we have no way to get at the rest. Leave the CSE
777938fd1498Szrj opportunity alone.
778038fd1498Szrj ??? With the group load eventually participating
778138fd1498Szrj in multiple different permutations (having multiple
778238fd1498Szrj slp nodes which refer to the same group) the CSE
778338fd1498Szrj is even wrong code. See PR56270. */
778438fd1498Szrj && !slp)
778538fd1498Szrj {
778638fd1498Szrj *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
778738fd1498Szrj return true;
778838fd1498Szrj }
778938fd1498Szrj first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
779038fd1498Szrj group_gap_adj = 0;
779138fd1498Szrj
779238fd1498Szrj /* VEC_NUM is the number of vect stmts to be created for this group. */
779338fd1498Szrj if (slp)
779438fd1498Szrj {
779538fd1498Szrj grouped_load = false;
779638fd1498Szrj /* For SLP permutation support we need to load the whole group,
779738fd1498Szrj not only the number of vector stmts the permutation result
779838fd1498Szrj fits in. */
779938fd1498Szrj if (slp_perm)
780038fd1498Szrj {
780138fd1498Szrj /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
780238fd1498Szrj variable VF. */
780338fd1498Szrj unsigned int const_vf = vf.to_constant ();
780438fd1498Szrj unsigned int const_nunits = nunits.to_constant ();
780538fd1498Szrj vec_num = CEIL (group_size * const_vf, const_nunits);
780638fd1498Szrj group_gap_adj = vf * group_size - nunits * vec_num;
780738fd1498Szrj }
780838fd1498Szrj else
780938fd1498Szrj {
781038fd1498Szrj vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
781138fd1498Szrj group_gap_adj
781238fd1498Szrj = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
781338fd1498Szrj }
781438fd1498Szrj }
781538fd1498Szrj else
781638fd1498Szrj vec_num = group_size;
781738fd1498Szrj
781838fd1498Szrj ref_type = get_group_alias_ptr_type (first_stmt);
781938fd1498Szrj }
782038fd1498Szrj else
782138fd1498Szrj {
782238fd1498Szrj first_stmt = stmt;
782338fd1498Szrj first_dr = dr;
782438fd1498Szrj group_size = vec_num = 1;
782538fd1498Szrj group_gap_adj = 0;
782638fd1498Szrj ref_type = reference_alias_ptr_type (DR_REF (first_dr));
782738fd1498Szrj }
782838fd1498Szrj
782938fd1498Szrj alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
783038fd1498Szrj gcc_assert (alignment_support_scheme);
783138fd1498Szrj vec_loop_masks *loop_masks
783238fd1498Szrj = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
783338fd1498Szrj ? &LOOP_VINFO_MASKS (loop_vinfo)
783438fd1498Szrj : NULL);
783538fd1498Szrj /* Targets with store-lane instructions must not require explicit
783638fd1498Szrj realignment. vect_supportable_dr_alignment always returns either
783738fd1498Szrj dr_aligned or dr_unaligned_supported for masked operations. */
783838fd1498Szrj gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
783938fd1498Szrj && !mask
784038fd1498Szrj && !loop_masks)
784138fd1498Szrj || alignment_support_scheme == dr_aligned
784238fd1498Szrj || alignment_support_scheme == dr_unaligned_supported);
784338fd1498Szrj
784438fd1498Szrj /* In case the vectorization factor (VF) is bigger than the number
784538fd1498Szrj of elements that we can fit in a vectype (nunits), we have to generate
784638fd1498Szrj more than one vector stmt - i.e - we need to "unroll" the
784738fd1498Szrj vector stmt by a factor VF/nunits. In doing so, we record a pointer
784838fd1498Szrj from one copy of the vector stmt to the next, in the field
784938fd1498Szrj STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
785038fd1498Szrj stages to find the correct vector defs to be used when vectorizing
785138fd1498Szrj stmts that use the defs of the current stmt. The example below
785238fd1498Szrj illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
785338fd1498Szrj need to create 4 vectorized stmts):
785438fd1498Szrj
785538fd1498Szrj before vectorization:
785638fd1498Szrj RELATED_STMT VEC_STMT
785738fd1498Szrj S1: x = memref - -
785838fd1498Szrj S2: z = x + 1 - -
785938fd1498Szrj
786038fd1498Szrj step 1: vectorize stmt S1:
786138fd1498Szrj We first create the vector stmt VS1_0, and, as usual, record a
786238fd1498Szrj pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
786338fd1498Szrj Next, we create the vector stmt VS1_1, and record a pointer to
786438fd1498Szrj it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
786538fd1498Szrj Similarly, for VS1_2 and VS1_3. This is the resulting chain of
786638fd1498Szrj stmts and pointers:
786738fd1498Szrj RELATED_STMT VEC_STMT
786838fd1498Szrj VS1_0: vx0 = memref0 VS1_1 -
786938fd1498Szrj VS1_1: vx1 = memref1 VS1_2 -
787038fd1498Szrj VS1_2: vx2 = memref2 VS1_3 -
787138fd1498Szrj VS1_3: vx3 = memref3 - -
787238fd1498Szrj S1: x = load - VS1_0
787338fd1498Szrj S2: z = x + 1 - -
787438fd1498Szrj
787538fd1498Szrj See in documentation in vect_get_vec_def_for_stmt_copy for how the
787638fd1498Szrj information we recorded in RELATED_STMT field is used to vectorize
787738fd1498Szrj stmt S2. */
787838fd1498Szrj
787938fd1498Szrj /* In case of interleaving (non-unit grouped access):
788038fd1498Szrj
788138fd1498Szrj S1: x2 = &base + 2
788238fd1498Szrj S2: x0 = &base
788338fd1498Szrj S3: x1 = &base + 1
788438fd1498Szrj S4: x3 = &base + 3
788538fd1498Szrj
788638fd1498Szrj Vectorized loads are created in the order of memory accesses
788738fd1498Szrj starting from the access of the first stmt of the chain:
788838fd1498Szrj
788938fd1498Szrj VS1: vx0 = &base
789038fd1498Szrj VS2: vx1 = &base + vec_size*1
789138fd1498Szrj VS3: vx3 = &base + vec_size*2
789238fd1498Szrj VS4: vx4 = &base + vec_size*3
789338fd1498Szrj
789438fd1498Szrj Then permutation statements are generated:
789538fd1498Szrj
789638fd1498Szrj VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
789738fd1498Szrj VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
789838fd1498Szrj ...
789938fd1498Szrj
790038fd1498Szrj And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
790138fd1498Szrj (the order of the data-refs in the output of vect_permute_load_chain
790238fd1498Szrj corresponds to the order of scalar stmts in the interleaving chain - see
790338fd1498Szrj the documentation of vect_permute_load_chain()).
790438fd1498Szrj The generation of permutation stmts and recording them in
790538fd1498Szrj STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
790638fd1498Szrj
790738fd1498Szrj In case of both multiple types and interleaving, the vector loads and
790838fd1498Szrj permutation stmts above are created for every copy. The result vector
790938fd1498Szrj stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
791038fd1498Szrj corresponding STMT_VINFO_RELATED_STMT for the next copies. */
791138fd1498Szrj
791238fd1498Szrj /* If the data reference is aligned (dr_aligned) or potentially unaligned
791338fd1498Szrj on a target that supports unaligned accesses (dr_unaligned_supported)
791438fd1498Szrj we generate the following code:
791538fd1498Szrj p = initial_addr;
791638fd1498Szrj indx = 0;
791738fd1498Szrj loop {
791838fd1498Szrj p = p + indx * vectype_size;
791938fd1498Szrj vec_dest = *(p);
792038fd1498Szrj indx = indx + 1;
792138fd1498Szrj }
792238fd1498Szrj
792338fd1498Szrj Otherwise, the data reference is potentially unaligned on a target that
792438fd1498Szrj does not support unaligned accesses (dr_explicit_realign_optimized) -
792538fd1498Szrj then generate the following code, in which the data in each iteration is
792638fd1498Szrj obtained by two vector loads, one from the previous iteration, and one
792738fd1498Szrj from the current iteration:
792838fd1498Szrj p1 = initial_addr;
792938fd1498Szrj msq_init = *(floor(p1))
793038fd1498Szrj p2 = initial_addr + VS - 1;
793138fd1498Szrj realignment_token = call target_builtin;
793238fd1498Szrj indx = 0;
793338fd1498Szrj loop {
793438fd1498Szrj p2 = p2 + indx * vectype_size
793538fd1498Szrj lsq = *(floor(p2))
793638fd1498Szrj vec_dest = realign_load (msq, lsq, realignment_token)
793738fd1498Szrj indx = indx + 1;
793838fd1498Szrj msq = lsq;
793938fd1498Szrj } */
794038fd1498Szrj
794138fd1498Szrj /* If the misalignment remains the same throughout the execution of the
794238fd1498Szrj loop, we can create the init_addr and permutation mask at the loop
794338fd1498Szrj preheader. Otherwise, it needs to be created inside the loop.
794438fd1498Szrj This can only occur when vectorizing memory accesses in the inner-loop
794538fd1498Szrj nested within an outer-loop that is being vectorized. */
794638fd1498Szrj
794738fd1498Szrj if (nested_in_vect_loop
794838fd1498Szrj && !multiple_p (DR_STEP_ALIGNMENT (dr),
794938fd1498Szrj GET_MODE_SIZE (TYPE_MODE (vectype))))
795038fd1498Szrj {
795138fd1498Szrj gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
795238fd1498Szrj compute_in_loop = true;
795338fd1498Szrj }
795438fd1498Szrj
795538fd1498Szrj if ((alignment_support_scheme == dr_explicit_realign_optimized
795638fd1498Szrj || alignment_support_scheme == dr_explicit_realign)
795738fd1498Szrj && !compute_in_loop)
795838fd1498Szrj {
795938fd1498Szrj msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
796038fd1498Szrj alignment_support_scheme, NULL_TREE,
796138fd1498Szrj &at_loop);
796238fd1498Szrj if (alignment_support_scheme == dr_explicit_realign_optimized)
796338fd1498Szrj {
796438fd1498Szrj phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
796538fd1498Szrj byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
796638fd1498Szrj size_one_node);
796738fd1498Szrj }
796838fd1498Szrj }
796938fd1498Szrj else
797038fd1498Szrj at_loop = loop;
797138fd1498Szrj
797238fd1498Szrj if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
797338fd1498Szrj offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
797438fd1498Szrj
797538fd1498Szrj tree bump;
797638fd1498Szrj tree vec_offset = NULL_TREE;
797738fd1498Szrj if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
797838fd1498Szrj {
797938fd1498Szrj aggr_type = NULL_TREE;
798038fd1498Szrj bump = NULL_TREE;
798138fd1498Szrj }
798238fd1498Szrj else if (memory_access_type == VMAT_GATHER_SCATTER)
798338fd1498Szrj {
798438fd1498Szrj aggr_type = elem_type;
798538fd1498Szrj vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
798638fd1498Szrj &bump, &vec_offset);
798738fd1498Szrj }
798838fd1498Szrj else
798938fd1498Szrj {
799038fd1498Szrj if (memory_access_type == VMAT_LOAD_STORE_LANES)
799138fd1498Szrj aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
799238fd1498Szrj else
799338fd1498Szrj aggr_type = vectype;
799438fd1498Szrj bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
799538fd1498Szrj }
799638fd1498Szrj
799738fd1498Szrj tree vec_mask = NULL_TREE;
799838fd1498Szrj prev_stmt_info = NULL;
799938fd1498Szrj poly_uint64 group_elt = 0;
800038fd1498Szrj for (j = 0; j < ncopies; j++)
800138fd1498Szrj {
800238fd1498Szrj /* 1. Create the vector or array pointer update chain. */
800338fd1498Szrj if (j == 0)
800438fd1498Szrj {
800538fd1498Szrj bool simd_lane_access_p
800638fd1498Szrj = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
800738fd1498Szrj if (simd_lane_access_p
800838fd1498Szrj && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
800938fd1498Szrj && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
801038fd1498Szrj && integer_zerop (DR_OFFSET (first_dr))
801138fd1498Szrj && integer_zerop (DR_INIT (first_dr))
801238fd1498Szrj && alias_sets_conflict_p (get_alias_set (aggr_type),
801338fd1498Szrj get_alias_set (TREE_TYPE (ref_type)))
801438fd1498Szrj && (alignment_support_scheme == dr_aligned
801538fd1498Szrj || alignment_support_scheme == dr_unaligned_supported))
801638fd1498Szrj {
801738fd1498Szrj dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
801838fd1498Szrj dataref_offset = build_int_cst (ref_type, 0);
801938fd1498Szrj inv_p = false;
802038fd1498Szrj }
802138fd1498Szrj else if (first_stmt_for_drptr
802238fd1498Szrj && first_stmt != first_stmt_for_drptr)
802338fd1498Szrj {
802438fd1498Szrj dataref_ptr
802538fd1498Szrj = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
802638fd1498Szrj at_loop, offset, &dummy, gsi,
802738fd1498Szrj &ptr_incr, simd_lane_access_p,
802838fd1498Szrj &inv_p, byte_offset, bump);
802938fd1498Szrj /* Adjust the pointer by the difference to first_stmt. */
803038fd1498Szrj data_reference_p ptrdr
803138fd1498Szrj = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
803238fd1498Szrj tree diff = fold_convert (sizetype,
803338fd1498Szrj size_binop (MINUS_EXPR,
803438fd1498Szrj DR_INIT (first_dr),
803538fd1498Szrj DR_INIT (ptrdr)));
803638fd1498Szrj dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
803738fd1498Szrj stmt, diff);
803838fd1498Szrj }
803938fd1498Szrj else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
804038fd1498Szrj {
804138fd1498Szrj vect_get_gather_scatter_ops (loop, stmt, &gs_info,
804238fd1498Szrj &dataref_ptr, &vec_offset);
804338fd1498Szrj inv_p = false;
804438fd1498Szrj }
804538fd1498Szrj else
804638fd1498Szrj dataref_ptr
804738fd1498Szrj = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
804838fd1498Szrj offset, &dummy, gsi, &ptr_incr,
804938fd1498Szrj simd_lane_access_p, &inv_p,
805038fd1498Szrj byte_offset, bump);
805138fd1498Szrj if (mask)
805238fd1498Szrj vec_mask = vect_get_vec_def_for_operand (mask, stmt,
805338fd1498Szrj mask_vectype);
805438fd1498Szrj }
805538fd1498Szrj else
805638fd1498Szrj {
805738fd1498Szrj if (dataref_offset)
805838fd1498Szrj dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
805938fd1498Szrj bump);
806038fd1498Szrj else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
806138fd1498Szrj vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
806238fd1498Szrj vec_offset);
806338fd1498Szrj else
806438fd1498Szrj dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
806538fd1498Szrj stmt, bump);
806638fd1498Szrj if (mask)
806738fd1498Szrj vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
806838fd1498Szrj }
806938fd1498Szrj
807038fd1498Szrj if (grouped_load || slp_perm)
807138fd1498Szrj dr_chain.create (vec_num);
807238fd1498Szrj
807338fd1498Szrj if (memory_access_type == VMAT_LOAD_STORE_LANES)
807438fd1498Szrj {
807538fd1498Szrj tree vec_array;
807638fd1498Szrj
807738fd1498Szrj vec_array = create_vector_array (vectype, vec_num);
807838fd1498Szrj
807938fd1498Szrj tree final_mask = NULL_TREE;
808038fd1498Szrj if (loop_masks)
808138fd1498Szrj final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
808238fd1498Szrj vectype, j);
808338fd1498Szrj if (vec_mask)
808438fd1498Szrj final_mask = prepare_load_store_mask (mask_vectype, final_mask,
808538fd1498Szrj vec_mask, gsi);
808638fd1498Szrj
808738fd1498Szrj gcall *call;
808838fd1498Szrj if (final_mask)
808938fd1498Szrj {
809038fd1498Szrj /* Emit:
809138fd1498Szrj VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
809238fd1498Szrj VEC_MASK). */
809338fd1498Szrj unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
809438fd1498Szrj tree alias_ptr = build_int_cst (ref_type, align);
809538fd1498Szrj call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
809638fd1498Szrj dataref_ptr, alias_ptr,
809738fd1498Szrj final_mask);
809838fd1498Szrj }
809938fd1498Szrj else
810038fd1498Szrj {
810138fd1498Szrj /* Emit:
810238fd1498Szrj VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
810338fd1498Szrj data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
810438fd1498Szrj call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
810538fd1498Szrj }
810638fd1498Szrj gimple_call_set_lhs (call, vec_array);
810738fd1498Szrj gimple_call_set_nothrow (call, true);
810838fd1498Szrj new_stmt = call;
810938fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
811038fd1498Szrj
811138fd1498Szrj /* Extract each vector into an SSA_NAME. */
811238fd1498Szrj for (i = 0; i < vec_num; i++)
811338fd1498Szrj {
811438fd1498Szrj new_temp = read_vector_array (stmt, gsi, scalar_dest,
811538fd1498Szrj vec_array, i);
811638fd1498Szrj dr_chain.quick_push (new_temp);
811738fd1498Szrj }
811838fd1498Szrj
811938fd1498Szrj /* Record the mapping between SSA_NAMEs and statements. */
812038fd1498Szrj vect_record_grouped_load_vectors (stmt, dr_chain);
812138fd1498Szrj }
812238fd1498Szrj else
812338fd1498Szrj {
812438fd1498Szrj for (i = 0; i < vec_num; i++)
812538fd1498Szrj {
812638fd1498Szrj tree final_mask = NULL_TREE;
812738fd1498Szrj if (loop_masks
812838fd1498Szrj && memory_access_type != VMAT_INVARIANT)
812938fd1498Szrj final_mask = vect_get_loop_mask (gsi, loop_masks,
813038fd1498Szrj vec_num * ncopies,
813138fd1498Szrj vectype, vec_num * j + i);
813238fd1498Szrj if (vec_mask)
813338fd1498Szrj final_mask = prepare_load_store_mask (mask_vectype, final_mask,
813438fd1498Szrj vec_mask, gsi);
813538fd1498Szrj
813638fd1498Szrj if (i > 0)
813738fd1498Szrj dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
813838fd1498Szrj stmt, bump);
813938fd1498Szrj
814038fd1498Szrj /* 2. Create the vector-load in the loop. */
814138fd1498Szrj switch (alignment_support_scheme)
814238fd1498Szrj {
814338fd1498Szrj case dr_aligned:
814438fd1498Szrj case dr_unaligned_supported:
814538fd1498Szrj {
814638fd1498Szrj unsigned int align, misalign;
814738fd1498Szrj
814838fd1498Szrj if (memory_access_type == VMAT_GATHER_SCATTER)
814938fd1498Szrj {
815038fd1498Szrj tree scale = size_int (gs_info.scale);
815138fd1498Szrj gcall *call;
815238fd1498Szrj if (loop_masks)
815338fd1498Szrj call = gimple_build_call_internal
815438fd1498Szrj (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
815538fd1498Szrj vec_offset, scale, final_mask);
815638fd1498Szrj else
815738fd1498Szrj call = gimple_build_call_internal
815838fd1498Szrj (IFN_GATHER_LOAD, 3, dataref_ptr,
815938fd1498Szrj vec_offset, scale);
816038fd1498Szrj gimple_call_set_nothrow (call, true);
816138fd1498Szrj new_stmt = call;
816238fd1498Szrj data_ref = NULL_TREE;
816338fd1498Szrj break;
816438fd1498Szrj }
816538fd1498Szrj
816638fd1498Szrj align = DR_TARGET_ALIGNMENT (dr);
816738fd1498Szrj if (alignment_support_scheme == dr_aligned)
816838fd1498Szrj {
816938fd1498Szrj gcc_assert (aligned_access_p (first_dr));
817038fd1498Szrj misalign = 0;
817138fd1498Szrj }
817238fd1498Szrj else if (DR_MISALIGNMENT (first_dr) == -1)
817338fd1498Szrj {
817438fd1498Szrj align = dr_alignment (vect_dr_behavior (first_dr));
817538fd1498Szrj misalign = 0;
817638fd1498Szrj }
817738fd1498Szrj else
817838fd1498Szrj misalign = DR_MISALIGNMENT (first_dr);
817938fd1498Szrj if (dataref_offset == NULL_TREE
818038fd1498Szrj && TREE_CODE (dataref_ptr) == SSA_NAME)
818138fd1498Szrj set_ptr_info_alignment (get_ptr_info (dataref_ptr),
818238fd1498Szrj align, misalign);
818338fd1498Szrj
818438fd1498Szrj if (final_mask)
818538fd1498Szrj {
818638fd1498Szrj align = least_bit_hwi (misalign | align);
818738fd1498Szrj tree ptr = build_int_cst (ref_type, align);
818838fd1498Szrj gcall *call
818938fd1498Szrj = gimple_build_call_internal (IFN_MASK_LOAD, 3,
819038fd1498Szrj dataref_ptr, ptr,
819138fd1498Szrj final_mask);
819238fd1498Szrj gimple_call_set_nothrow (call, true);
819338fd1498Szrj new_stmt = call;
819438fd1498Szrj data_ref = NULL_TREE;
819538fd1498Szrj }
819638fd1498Szrj else
819738fd1498Szrj {
819838fd1498Szrj data_ref
819938fd1498Szrj = fold_build2 (MEM_REF, vectype, dataref_ptr,
820038fd1498Szrj dataref_offset
820138fd1498Szrj ? dataref_offset
820238fd1498Szrj : build_int_cst (ref_type, 0));
820338fd1498Szrj if (alignment_support_scheme == dr_aligned)
820438fd1498Szrj ;
820538fd1498Szrj else if (DR_MISALIGNMENT (first_dr) == -1)
820638fd1498Szrj TREE_TYPE (data_ref)
820738fd1498Szrj = build_aligned_type (TREE_TYPE (data_ref),
820838fd1498Szrj align * BITS_PER_UNIT);
820938fd1498Szrj else
821038fd1498Szrj TREE_TYPE (data_ref)
821138fd1498Szrj = build_aligned_type (TREE_TYPE (data_ref),
821238fd1498Szrj TYPE_ALIGN (elem_type));
821338fd1498Szrj }
821438fd1498Szrj break;
821538fd1498Szrj }
821638fd1498Szrj case dr_explicit_realign:
821738fd1498Szrj {
821838fd1498Szrj tree ptr, bump;
821938fd1498Szrj
822038fd1498Szrj tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
822138fd1498Szrj
822238fd1498Szrj if (compute_in_loop)
822338fd1498Szrj msq = vect_setup_realignment (first_stmt, gsi,
822438fd1498Szrj &realignment_token,
822538fd1498Szrj dr_explicit_realign,
822638fd1498Szrj dataref_ptr, NULL);
822738fd1498Szrj
822838fd1498Szrj if (TREE_CODE (dataref_ptr) == SSA_NAME)
822938fd1498Szrj ptr = copy_ssa_name (dataref_ptr);
823038fd1498Szrj else
823138fd1498Szrj ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
823238fd1498Szrj unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
823338fd1498Szrj new_stmt = gimple_build_assign
823438fd1498Szrj (ptr, BIT_AND_EXPR, dataref_ptr,
823538fd1498Szrj build_int_cst
823638fd1498Szrj (TREE_TYPE (dataref_ptr),
823738fd1498Szrj -(HOST_WIDE_INT) align));
823838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
823938fd1498Szrj data_ref
824038fd1498Szrj = build2 (MEM_REF, vectype, ptr,
824138fd1498Szrj build_int_cst (ref_type, 0));
824238fd1498Szrj vect_copy_ref_info (data_ref, DR_REF (first_dr));
824338fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest,
824438fd1498Szrj vectype);
824538fd1498Szrj new_stmt = gimple_build_assign (vec_dest, data_ref);
824638fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
824738fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
824838fd1498Szrj gimple_set_vdef (new_stmt, gimple_vdef (stmt));
824938fd1498Szrj gimple_set_vuse (new_stmt, gimple_vuse (stmt));
825038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
825138fd1498Szrj msq = new_temp;
825238fd1498Szrj
825338fd1498Szrj bump = size_binop (MULT_EXPR, vs,
825438fd1498Szrj TYPE_SIZE_UNIT (elem_type));
825538fd1498Szrj bump = size_binop (MINUS_EXPR, bump, size_one_node);
825638fd1498Szrj ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
825738fd1498Szrj new_stmt = gimple_build_assign
825838fd1498Szrj (NULL_TREE, BIT_AND_EXPR, ptr,
825938fd1498Szrj build_int_cst
826038fd1498Szrj (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
826138fd1498Szrj ptr = copy_ssa_name (ptr, new_stmt);
826238fd1498Szrj gimple_assign_set_lhs (new_stmt, ptr);
826338fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
826438fd1498Szrj data_ref
826538fd1498Szrj = build2 (MEM_REF, vectype, ptr,
826638fd1498Szrj build_int_cst (ref_type, 0));
826738fd1498Szrj break;
826838fd1498Szrj }
826938fd1498Szrj case dr_explicit_realign_optimized:
827038fd1498Szrj {
827138fd1498Szrj if (TREE_CODE (dataref_ptr) == SSA_NAME)
827238fd1498Szrj new_temp = copy_ssa_name (dataref_ptr);
827338fd1498Szrj else
827438fd1498Szrj new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
827538fd1498Szrj unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
827638fd1498Szrj new_stmt = gimple_build_assign
827738fd1498Szrj (new_temp, BIT_AND_EXPR, dataref_ptr,
827838fd1498Szrj build_int_cst (TREE_TYPE (dataref_ptr),
827938fd1498Szrj -(HOST_WIDE_INT) align));
828038fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
828138fd1498Szrj data_ref
828238fd1498Szrj = build2 (MEM_REF, vectype, new_temp,
828338fd1498Szrj build_int_cst (ref_type, 0));
828438fd1498Szrj break;
828538fd1498Szrj }
828638fd1498Szrj default:
828738fd1498Szrj gcc_unreachable ();
828838fd1498Szrj }
828938fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype);
829038fd1498Szrj /* DATA_REF is null if we've already built the statement. */
829138fd1498Szrj if (data_ref)
829238fd1498Szrj {
829338fd1498Szrj vect_copy_ref_info (data_ref, DR_REF (first_dr));
829438fd1498Szrj new_stmt = gimple_build_assign (vec_dest, data_ref);
829538fd1498Szrj }
829638fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
829738fd1498Szrj gimple_set_lhs (new_stmt, new_temp);
829838fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
829938fd1498Szrj
830038fd1498Szrj /* 3. Handle explicit realignment if necessary/supported.
830138fd1498Szrj Create in loop:
830238fd1498Szrj vec_dest = realign_load (msq, lsq, realignment_token) */
830338fd1498Szrj if (alignment_support_scheme == dr_explicit_realign_optimized
830438fd1498Szrj || alignment_support_scheme == dr_explicit_realign)
830538fd1498Szrj {
830638fd1498Szrj lsq = gimple_assign_lhs (new_stmt);
830738fd1498Szrj if (!realignment_token)
830838fd1498Szrj realignment_token = dataref_ptr;
830938fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype);
831038fd1498Szrj new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
831138fd1498Szrj msq, lsq, realignment_token);
831238fd1498Szrj new_temp = make_ssa_name (vec_dest, new_stmt);
831338fd1498Szrj gimple_assign_set_lhs (new_stmt, new_temp);
831438fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
831538fd1498Szrj
831638fd1498Szrj if (alignment_support_scheme == dr_explicit_realign_optimized)
831738fd1498Szrj {
831838fd1498Szrj gcc_assert (phi);
831938fd1498Szrj if (i == vec_num - 1 && j == ncopies - 1)
832038fd1498Szrj add_phi_arg (phi, lsq,
832138fd1498Szrj loop_latch_edge (containing_loop),
832238fd1498Szrj UNKNOWN_LOCATION);
832338fd1498Szrj msq = lsq;
832438fd1498Szrj }
832538fd1498Szrj }
832638fd1498Szrj
832738fd1498Szrj /* 4. Handle invariant-load. */
832838fd1498Szrj if (inv_p && !bb_vinfo)
832938fd1498Szrj {
833038fd1498Szrj gcc_assert (!grouped_load);
833138fd1498Szrj /* If we have versioned for aliasing or the loop doesn't
833238fd1498Szrj have any data dependencies that would preclude this,
833338fd1498Szrj then we are sure this is a loop invariant load and
833438fd1498Szrj thus we can insert it on the preheader edge. */
833538fd1498Szrj if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
833638fd1498Szrj && !nested_in_vect_loop
833738fd1498Szrj && hoist_defs_of_uses (stmt, loop))
833838fd1498Szrj {
833938fd1498Szrj if (dump_enabled_p ())
834038fd1498Szrj {
834138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
834238fd1498Szrj "hoisting out of the vectorized "
834338fd1498Szrj "loop: ");
834438fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
834538fd1498Szrj }
834638fd1498Szrj tree tem = copy_ssa_name (scalar_dest);
834738fd1498Szrj gsi_insert_on_edge_immediate
834838fd1498Szrj (loop_preheader_edge (loop),
834938fd1498Szrj gimple_build_assign (tem,
835038fd1498Szrj unshare_expr
835138fd1498Szrj (gimple_assign_rhs1 (stmt))));
835238fd1498Szrj new_temp = vect_init_vector (stmt, tem, vectype, NULL);
835338fd1498Szrj new_stmt = SSA_NAME_DEF_STMT (new_temp);
835438fd1498Szrj set_vinfo_for_stmt (new_stmt,
835538fd1498Szrj new_stmt_vec_info (new_stmt, vinfo));
835638fd1498Szrj }
835738fd1498Szrj else
835838fd1498Szrj {
835938fd1498Szrj gimple_stmt_iterator gsi2 = *gsi;
836038fd1498Szrj gsi_next (&gsi2);
836138fd1498Szrj new_temp = vect_init_vector (stmt, scalar_dest,
836238fd1498Szrj vectype, &gsi2);
836338fd1498Szrj new_stmt = SSA_NAME_DEF_STMT (new_temp);
836438fd1498Szrj }
836538fd1498Szrj }
836638fd1498Szrj
836738fd1498Szrj if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
836838fd1498Szrj {
836938fd1498Szrj tree perm_mask = perm_mask_for_reverse (vectype);
837038fd1498Szrj new_temp = permute_vec_elements (new_temp, new_temp,
837138fd1498Szrj perm_mask, stmt, gsi);
837238fd1498Szrj new_stmt = SSA_NAME_DEF_STMT (new_temp);
837338fd1498Szrj }
837438fd1498Szrj
837538fd1498Szrj /* Collect vector loads and later create their permutation in
837638fd1498Szrj vect_transform_grouped_load (). */
837738fd1498Szrj if (grouped_load || slp_perm)
837838fd1498Szrj dr_chain.quick_push (new_temp);
837938fd1498Szrj
838038fd1498Szrj /* Store vector loads in the corresponding SLP_NODE. */
838138fd1498Szrj if (slp && !slp_perm)
838238fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
838338fd1498Szrj
838438fd1498Szrj /* With SLP permutation we load the gaps as well, without
838538fd1498Szrj we need to skip the gaps after we manage to fully load
838638fd1498Szrj all elements. group_gap_adj is GROUP_SIZE here. */
838738fd1498Szrj group_elt += nunits;
838838fd1498Szrj if (maybe_ne (group_gap_adj, 0U)
838938fd1498Szrj && !slp_perm
839038fd1498Szrj && known_eq (group_elt, group_size - group_gap_adj))
839138fd1498Szrj {
839238fd1498Szrj poly_wide_int bump_val
839338fd1498Szrj = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
839438fd1498Szrj * group_gap_adj);
839538fd1498Szrj tree bump = wide_int_to_tree (sizetype, bump_val);
839638fd1498Szrj dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
839738fd1498Szrj stmt, bump);
839838fd1498Szrj group_elt = 0;
839938fd1498Szrj }
840038fd1498Szrj }
840138fd1498Szrj /* Bump the vector pointer to account for a gap or for excess
840238fd1498Szrj elements loaded for a permuted SLP load. */
840338fd1498Szrj if (maybe_ne (group_gap_adj, 0U) && slp_perm)
840438fd1498Szrj {
840538fd1498Szrj poly_wide_int bump_val
840638fd1498Szrj = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
840738fd1498Szrj * group_gap_adj);
840838fd1498Szrj tree bump = wide_int_to_tree (sizetype, bump_val);
840938fd1498Szrj dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
841038fd1498Szrj stmt, bump);
841138fd1498Szrj }
841238fd1498Szrj }
841338fd1498Szrj
841438fd1498Szrj if (slp && !slp_perm)
841538fd1498Szrj continue;
841638fd1498Szrj
841738fd1498Szrj if (slp_perm)
841838fd1498Szrj {
841938fd1498Szrj unsigned n_perms;
842038fd1498Szrj if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
842138fd1498Szrj slp_node_instance, false,
842238fd1498Szrj &n_perms))
842338fd1498Szrj {
842438fd1498Szrj dr_chain.release ();
842538fd1498Szrj return false;
842638fd1498Szrj }
842738fd1498Szrj }
842838fd1498Szrj else
842938fd1498Szrj {
843038fd1498Szrj if (grouped_load)
843138fd1498Szrj {
843238fd1498Szrj if (memory_access_type != VMAT_LOAD_STORE_LANES)
843338fd1498Szrj vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
843438fd1498Szrj *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
843538fd1498Szrj }
843638fd1498Szrj else
843738fd1498Szrj {
843838fd1498Szrj if (j == 0)
843938fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
844038fd1498Szrj else
844138fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
844238fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
844338fd1498Szrj }
844438fd1498Szrj }
844538fd1498Szrj dr_chain.release ();
844638fd1498Szrj }
844738fd1498Szrj
844838fd1498Szrj return true;
844938fd1498Szrj }
845038fd1498Szrj
845138fd1498Szrj /* Function vect_is_simple_cond.
845238fd1498Szrj
845338fd1498Szrj Input:
845438fd1498Szrj LOOP - the loop that is being vectorized.
845538fd1498Szrj COND - Condition that is checked for simple use.
845638fd1498Szrj
845738fd1498Szrj Output:
845838fd1498Szrj *COMP_VECTYPE - the vector type for the comparison.
845938fd1498Szrj *DTS - The def types for the arguments of the comparison
846038fd1498Szrj
846138fd1498Szrj Returns whether a COND can be vectorized. Checks whether
846238fd1498Szrj condition operands are supportable using vec_is_simple_use. */
846338fd1498Szrj
846438fd1498Szrj static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,tree * comp_vectype,enum vect_def_type * dts,tree vectype)846538fd1498Szrj vect_is_simple_cond (tree cond, vec_info *vinfo,
846638fd1498Szrj tree *comp_vectype, enum vect_def_type *dts,
846738fd1498Szrj tree vectype)
846838fd1498Szrj {
846938fd1498Szrj tree lhs, rhs;
847038fd1498Szrj tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
847138fd1498Szrj
847238fd1498Szrj /* Mask case. */
847338fd1498Szrj if (TREE_CODE (cond) == SSA_NAME
847438fd1498Szrj && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
847538fd1498Szrj {
847638fd1498Szrj gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
847738fd1498Szrj if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
847838fd1498Szrj &dts[0], comp_vectype)
847938fd1498Szrj || !*comp_vectype
848038fd1498Szrj || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
848138fd1498Szrj return false;
848238fd1498Szrj return true;
848338fd1498Szrj }
848438fd1498Szrj
848538fd1498Szrj if (!COMPARISON_CLASS_P (cond))
848638fd1498Szrj return false;
848738fd1498Szrj
848838fd1498Szrj lhs = TREE_OPERAND (cond, 0);
848938fd1498Szrj rhs = TREE_OPERAND (cond, 1);
849038fd1498Szrj
849138fd1498Szrj if (TREE_CODE (lhs) == SSA_NAME)
849238fd1498Szrj {
849338fd1498Szrj gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
849438fd1498Szrj if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
849538fd1498Szrj return false;
849638fd1498Szrj }
849738fd1498Szrj else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
849838fd1498Szrj || TREE_CODE (lhs) == FIXED_CST)
849938fd1498Szrj dts[0] = vect_constant_def;
850038fd1498Szrj else
850138fd1498Szrj return false;
850238fd1498Szrj
850338fd1498Szrj if (TREE_CODE (rhs) == SSA_NAME)
850438fd1498Szrj {
850538fd1498Szrj gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
850638fd1498Szrj if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
850738fd1498Szrj return false;
850838fd1498Szrj }
850938fd1498Szrj else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
851038fd1498Szrj || TREE_CODE (rhs) == FIXED_CST)
851138fd1498Szrj dts[1] = vect_constant_def;
851238fd1498Szrj else
851338fd1498Szrj return false;
851438fd1498Szrj
851538fd1498Szrj if (vectype1 && vectype2
851638fd1498Szrj && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
851738fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype2)))
851838fd1498Szrj return false;
851938fd1498Szrj
852038fd1498Szrj *comp_vectype = vectype1 ? vectype1 : vectype2;
852138fd1498Szrj /* Invariant comparison. */
8522*58e805e6Szrj if (! *comp_vectype && vectype)
852338fd1498Szrj {
852438fd1498Szrj tree scalar_type = TREE_TYPE (lhs);
852538fd1498Szrj /* If we can widen the comparison to match vectype do so. */
852638fd1498Szrj if (INTEGRAL_TYPE_P (scalar_type)
852738fd1498Szrj && tree_int_cst_lt (TYPE_SIZE (scalar_type),
852838fd1498Szrj TYPE_SIZE (TREE_TYPE (vectype))))
852938fd1498Szrj scalar_type = build_nonstandard_integer_type
853038fd1498Szrj (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
853138fd1498Szrj TYPE_UNSIGNED (scalar_type));
853238fd1498Szrj *comp_vectype = get_vectype_for_scalar_type (scalar_type);
853338fd1498Szrj }
853438fd1498Szrj
853538fd1498Szrj return true;
853638fd1498Szrj }
853738fd1498Szrj
853838fd1498Szrj /* vectorizable_condition.
853938fd1498Szrj
854038fd1498Szrj Check if STMT is conditional modify expression that can be vectorized.
854138fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
854238fd1498Szrj stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
854338fd1498Szrj at GSI.
854438fd1498Szrj
854538fd1498Szrj When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
854638fd1498Szrj to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
854738fd1498Szrj else clause if it is 2).
854838fd1498Szrj
854938fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
855038fd1498Szrj
855138fd1498Szrj bool
vectorizable_condition(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,int reduc_index,slp_tree slp_node)855238fd1498Szrj vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
855338fd1498Szrj gimple **vec_stmt, tree reduc_def, int reduc_index,
855438fd1498Szrj slp_tree slp_node)
855538fd1498Szrj {
855638fd1498Szrj tree scalar_dest = NULL_TREE;
855738fd1498Szrj tree vec_dest = NULL_TREE;
855838fd1498Szrj tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
855938fd1498Szrj tree then_clause, else_clause;
856038fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
856138fd1498Szrj tree comp_vectype = NULL_TREE;
856238fd1498Szrj tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
856338fd1498Szrj tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
856438fd1498Szrj tree vec_compare;
856538fd1498Szrj tree new_temp;
856638fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
856738fd1498Szrj enum vect_def_type dts[4]
856838fd1498Szrj = {vect_unknown_def_type, vect_unknown_def_type,
856938fd1498Szrj vect_unknown_def_type, vect_unknown_def_type};
857038fd1498Szrj int ndts = 4;
857138fd1498Szrj int ncopies;
857238fd1498Szrj enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
857338fd1498Szrj stmt_vec_info prev_stmt_info = NULL;
857438fd1498Szrj int i, j;
857538fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
857638fd1498Szrj vec<tree> vec_oprnds0 = vNULL;
857738fd1498Szrj vec<tree> vec_oprnds1 = vNULL;
857838fd1498Szrj vec<tree> vec_oprnds2 = vNULL;
857938fd1498Szrj vec<tree> vec_oprnds3 = vNULL;
858038fd1498Szrj tree vec_cmp_type;
858138fd1498Szrj bool masked = false;
858238fd1498Szrj
858338fd1498Szrj if (reduc_index && STMT_SLP_TYPE (stmt_info))
858438fd1498Szrj return false;
858538fd1498Szrj
858638fd1498Szrj vect_reduction_type reduction_type
858738fd1498Szrj = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
858838fd1498Szrj if (reduction_type == TREE_CODE_REDUCTION)
858938fd1498Szrj {
859038fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
859138fd1498Szrj return false;
859238fd1498Szrj
859338fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
859438fd1498Szrj && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
859538fd1498Szrj && reduc_def))
859638fd1498Szrj return false;
859738fd1498Szrj
859838fd1498Szrj /* FORNOW: not yet supported. */
859938fd1498Szrj if (STMT_VINFO_LIVE_P (stmt_info))
860038fd1498Szrj {
860138fd1498Szrj if (dump_enabled_p ())
860238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
860338fd1498Szrj "value used after loop.\n");
860438fd1498Szrj return false;
860538fd1498Szrj }
860638fd1498Szrj }
860738fd1498Szrj
860838fd1498Szrj /* Is vectorizable conditional operation? */
860938fd1498Szrj if (!is_gimple_assign (stmt))
861038fd1498Szrj return false;
861138fd1498Szrj
861238fd1498Szrj code = gimple_assign_rhs_code (stmt);
861338fd1498Szrj
861438fd1498Szrj if (code != COND_EXPR)
861538fd1498Szrj return false;
861638fd1498Szrj
861738fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
861838fd1498Szrj tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
861938fd1498Szrj
862038fd1498Szrj if (slp_node)
862138fd1498Szrj ncopies = 1;
862238fd1498Szrj else
862338fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
862438fd1498Szrj
862538fd1498Szrj gcc_assert (ncopies >= 1);
862638fd1498Szrj if (reduc_index && ncopies > 1)
862738fd1498Szrj return false; /* FORNOW */
862838fd1498Szrj
862938fd1498Szrj cond_expr = gimple_assign_rhs1 (stmt);
863038fd1498Szrj then_clause = gimple_assign_rhs2 (stmt);
863138fd1498Szrj else_clause = gimple_assign_rhs3 (stmt);
863238fd1498Szrj
863338fd1498Szrj if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8634*58e805e6Szrj &comp_vectype, &dts[0], slp_node ? NULL : vectype)
863538fd1498Szrj || !comp_vectype)
863638fd1498Szrj return false;
863738fd1498Szrj
863838fd1498Szrj gimple *def_stmt;
863938fd1498Szrj if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
864038fd1498Szrj &vectype1))
864138fd1498Szrj return false;
864238fd1498Szrj if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
864338fd1498Szrj &vectype2))
864438fd1498Szrj return false;
864538fd1498Szrj
864638fd1498Szrj if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
864738fd1498Szrj return false;
864838fd1498Szrj
864938fd1498Szrj if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
865038fd1498Szrj return false;
865138fd1498Szrj
865238fd1498Szrj masked = !COMPARISON_CLASS_P (cond_expr);
865338fd1498Szrj vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
865438fd1498Szrj
865538fd1498Szrj if (vec_cmp_type == NULL_TREE)
865638fd1498Szrj return false;
865738fd1498Szrj
865838fd1498Szrj cond_code = TREE_CODE (cond_expr);
865938fd1498Szrj if (!masked)
866038fd1498Szrj {
866138fd1498Szrj cond_expr0 = TREE_OPERAND (cond_expr, 0);
866238fd1498Szrj cond_expr1 = TREE_OPERAND (cond_expr, 1);
866338fd1498Szrj }
866438fd1498Szrj
866538fd1498Szrj if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
866638fd1498Szrj {
866738fd1498Szrj /* Boolean values may have another representation in vectors
866838fd1498Szrj and therefore we prefer bit operations over comparison for
866938fd1498Szrj them (which also works for scalar masks). We store opcodes
867038fd1498Szrj to use in bitop1 and bitop2. Statement is vectorized as
867138fd1498Szrj BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
867238fd1498Szrj depending on bitop1 and bitop2 arity. */
867338fd1498Szrj switch (cond_code)
867438fd1498Szrj {
867538fd1498Szrj case GT_EXPR:
867638fd1498Szrj bitop1 = BIT_NOT_EXPR;
867738fd1498Szrj bitop2 = BIT_AND_EXPR;
867838fd1498Szrj break;
867938fd1498Szrj case GE_EXPR:
868038fd1498Szrj bitop1 = BIT_NOT_EXPR;
868138fd1498Szrj bitop2 = BIT_IOR_EXPR;
868238fd1498Szrj break;
868338fd1498Szrj case LT_EXPR:
868438fd1498Szrj bitop1 = BIT_NOT_EXPR;
868538fd1498Szrj bitop2 = BIT_AND_EXPR;
868638fd1498Szrj std::swap (cond_expr0, cond_expr1);
868738fd1498Szrj break;
868838fd1498Szrj case LE_EXPR:
868938fd1498Szrj bitop1 = BIT_NOT_EXPR;
869038fd1498Szrj bitop2 = BIT_IOR_EXPR;
869138fd1498Szrj std::swap (cond_expr0, cond_expr1);
869238fd1498Szrj break;
869338fd1498Szrj case NE_EXPR:
869438fd1498Szrj bitop1 = BIT_XOR_EXPR;
869538fd1498Szrj break;
869638fd1498Szrj case EQ_EXPR:
869738fd1498Szrj bitop1 = BIT_XOR_EXPR;
869838fd1498Szrj bitop2 = BIT_NOT_EXPR;
869938fd1498Szrj break;
870038fd1498Szrj default:
870138fd1498Szrj return false;
870238fd1498Szrj }
870338fd1498Szrj cond_code = SSA_NAME;
870438fd1498Szrj }
870538fd1498Szrj
870638fd1498Szrj if (!vec_stmt)
870738fd1498Szrj {
870838fd1498Szrj STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
870938fd1498Szrj if (bitop1 != NOP_EXPR)
871038fd1498Szrj {
871138fd1498Szrj machine_mode mode = TYPE_MODE (comp_vectype);
871238fd1498Szrj optab optab;
871338fd1498Szrj
871438fd1498Szrj optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
871538fd1498Szrj if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
871638fd1498Szrj return false;
871738fd1498Szrj
871838fd1498Szrj if (bitop2 != NOP_EXPR)
871938fd1498Szrj {
872038fd1498Szrj optab = optab_for_tree_code (bitop2, comp_vectype,
872138fd1498Szrj optab_default);
872238fd1498Szrj if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
872338fd1498Szrj return false;
872438fd1498Szrj }
872538fd1498Szrj }
872638fd1498Szrj if (expand_vec_cond_expr_p (vectype, comp_vectype,
872738fd1498Szrj cond_code))
872838fd1498Szrj {
872938fd1498Szrj if (!slp_node)
873038fd1498Szrj vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
873138fd1498Szrj return true;
873238fd1498Szrj }
873338fd1498Szrj return false;
873438fd1498Szrj }
873538fd1498Szrj
873638fd1498Szrj /* Transform. */
873738fd1498Szrj
873838fd1498Szrj if (!slp_node)
873938fd1498Szrj {
874038fd1498Szrj vec_oprnds0.create (1);
874138fd1498Szrj vec_oprnds1.create (1);
874238fd1498Szrj vec_oprnds2.create (1);
874338fd1498Szrj vec_oprnds3.create (1);
874438fd1498Szrj }
874538fd1498Szrj
874638fd1498Szrj /* Handle def. */
874738fd1498Szrj scalar_dest = gimple_assign_lhs (stmt);
874838fd1498Szrj if (reduction_type != EXTRACT_LAST_REDUCTION)
874938fd1498Szrj vec_dest = vect_create_destination_var (scalar_dest, vectype);
875038fd1498Szrj
875138fd1498Szrj /* Handle cond expr. */
875238fd1498Szrj for (j = 0; j < ncopies; j++)
875338fd1498Szrj {
875438fd1498Szrj gimple *new_stmt = NULL;
875538fd1498Szrj if (j == 0)
875638fd1498Szrj {
875738fd1498Szrj if (slp_node)
875838fd1498Szrj {
875938fd1498Szrj auto_vec<tree, 4> ops;
876038fd1498Szrj auto_vec<vec<tree>, 4> vec_defs;
876138fd1498Szrj
876238fd1498Szrj if (masked)
876338fd1498Szrj ops.safe_push (cond_expr);
876438fd1498Szrj else
876538fd1498Szrj {
876638fd1498Szrj ops.safe_push (cond_expr0);
876738fd1498Szrj ops.safe_push (cond_expr1);
876838fd1498Szrj }
876938fd1498Szrj ops.safe_push (then_clause);
877038fd1498Szrj ops.safe_push (else_clause);
877138fd1498Szrj vect_get_slp_defs (ops, slp_node, &vec_defs);
877238fd1498Szrj vec_oprnds3 = vec_defs.pop ();
877338fd1498Szrj vec_oprnds2 = vec_defs.pop ();
877438fd1498Szrj if (!masked)
877538fd1498Szrj vec_oprnds1 = vec_defs.pop ();
877638fd1498Szrj vec_oprnds0 = vec_defs.pop ();
877738fd1498Szrj }
877838fd1498Szrj else
877938fd1498Szrj {
878038fd1498Szrj gimple *gtemp;
878138fd1498Szrj if (masked)
878238fd1498Szrj {
878338fd1498Szrj vec_cond_lhs
878438fd1498Szrj = vect_get_vec_def_for_operand (cond_expr, stmt,
878538fd1498Szrj comp_vectype);
878638fd1498Szrj vect_is_simple_use (cond_expr, stmt_info->vinfo,
878738fd1498Szrj >emp, &dts[0]);
878838fd1498Szrj }
878938fd1498Szrj else
879038fd1498Szrj {
879138fd1498Szrj vec_cond_lhs
879238fd1498Szrj = vect_get_vec_def_for_operand (cond_expr0,
879338fd1498Szrj stmt, comp_vectype);
879438fd1498Szrj vect_is_simple_use (cond_expr0, loop_vinfo, >emp, &dts[0]);
879538fd1498Szrj
879638fd1498Szrj vec_cond_rhs
879738fd1498Szrj = vect_get_vec_def_for_operand (cond_expr1,
879838fd1498Szrj stmt, comp_vectype);
879938fd1498Szrj vect_is_simple_use (cond_expr1, loop_vinfo, >emp, &dts[1]);
880038fd1498Szrj }
880138fd1498Szrj if (reduc_index == 1)
880238fd1498Szrj vec_then_clause = reduc_def;
880338fd1498Szrj else
880438fd1498Szrj {
880538fd1498Szrj vec_then_clause = vect_get_vec_def_for_operand (then_clause,
880638fd1498Szrj stmt);
880738fd1498Szrj vect_is_simple_use (then_clause, loop_vinfo,
880838fd1498Szrj >emp, &dts[2]);
880938fd1498Szrj }
881038fd1498Szrj if (reduc_index == 2)
881138fd1498Szrj vec_else_clause = reduc_def;
881238fd1498Szrj else
881338fd1498Szrj {
881438fd1498Szrj vec_else_clause = vect_get_vec_def_for_operand (else_clause,
881538fd1498Szrj stmt);
881638fd1498Szrj vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]);
881738fd1498Szrj }
881838fd1498Szrj }
881938fd1498Szrj }
882038fd1498Szrj else
882138fd1498Szrj {
882238fd1498Szrj vec_cond_lhs
882338fd1498Szrj = vect_get_vec_def_for_stmt_copy (dts[0],
882438fd1498Szrj vec_oprnds0.pop ());
882538fd1498Szrj if (!masked)
882638fd1498Szrj vec_cond_rhs
882738fd1498Szrj = vect_get_vec_def_for_stmt_copy (dts[1],
882838fd1498Szrj vec_oprnds1.pop ());
882938fd1498Szrj
883038fd1498Szrj vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
883138fd1498Szrj vec_oprnds2.pop ());
883238fd1498Szrj vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
883338fd1498Szrj vec_oprnds3.pop ());
883438fd1498Szrj }
883538fd1498Szrj
883638fd1498Szrj if (!slp_node)
883738fd1498Szrj {
883838fd1498Szrj vec_oprnds0.quick_push (vec_cond_lhs);
883938fd1498Szrj if (!masked)
884038fd1498Szrj vec_oprnds1.quick_push (vec_cond_rhs);
884138fd1498Szrj vec_oprnds2.quick_push (vec_then_clause);
884238fd1498Szrj vec_oprnds3.quick_push (vec_else_clause);
884338fd1498Szrj }
884438fd1498Szrj
884538fd1498Szrj /* Arguments are ready. Create the new vector stmt. */
884638fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
884738fd1498Szrj {
884838fd1498Szrj vec_then_clause = vec_oprnds2[i];
884938fd1498Szrj vec_else_clause = vec_oprnds3[i];
885038fd1498Szrj
885138fd1498Szrj if (masked)
885238fd1498Szrj vec_compare = vec_cond_lhs;
885338fd1498Szrj else
885438fd1498Szrj {
885538fd1498Szrj vec_cond_rhs = vec_oprnds1[i];
885638fd1498Szrj if (bitop1 == NOP_EXPR)
885738fd1498Szrj vec_compare = build2 (cond_code, vec_cmp_type,
885838fd1498Szrj vec_cond_lhs, vec_cond_rhs);
885938fd1498Szrj else
886038fd1498Szrj {
886138fd1498Szrj new_temp = make_ssa_name (vec_cmp_type);
886238fd1498Szrj if (bitop1 == BIT_NOT_EXPR)
886338fd1498Szrj new_stmt = gimple_build_assign (new_temp, bitop1,
886438fd1498Szrj vec_cond_rhs);
886538fd1498Szrj else
886638fd1498Szrj new_stmt
886738fd1498Szrj = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
886838fd1498Szrj vec_cond_rhs);
886938fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
887038fd1498Szrj if (bitop2 == NOP_EXPR)
887138fd1498Szrj vec_compare = new_temp;
887238fd1498Szrj else if (bitop2 == BIT_NOT_EXPR)
887338fd1498Szrj {
887438fd1498Szrj /* Instead of doing ~x ? y : z do x ? z : y. */
887538fd1498Szrj vec_compare = new_temp;
887638fd1498Szrj std::swap (vec_then_clause, vec_else_clause);
887738fd1498Szrj }
887838fd1498Szrj else
887938fd1498Szrj {
888038fd1498Szrj vec_compare = make_ssa_name (vec_cmp_type);
888138fd1498Szrj new_stmt
888238fd1498Szrj = gimple_build_assign (vec_compare, bitop2,
888338fd1498Szrj vec_cond_lhs, new_temp);
888438fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
888538fd1498Szrj }
888638fd1498Szrj }
888738fd1498Szrj }
888838fd1498Szrj if (reduction_type == EXTRACT_LAST_REDUCTION)
888938fd1498Szrj {
889038fd1498Szrj if (!is_gimple_val (vec_compare))
889138fd1498Szrj {
889238fd1498Szrj tree vec_compare_name = make_ssa_name (vec_cmp_type);
889338fd1498Szrj new_stmt = gimple_build_assign (vec_compare_name,
889438fd1498Szrj vec_compare);
889538fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
889638fd1498Szrj vec_compare = vec_compare_name;
889738fd1498Szrj }
889838fd1498Szrj gcc_assert (reduc_index == 2);
889938fd1498Szrj new_stmt = gimple_build_call_internal
890038fd1498Szrj (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
890138fd1498Szrj vec_then_clause);
890238fd1498Szrj gimple_call_set_lhs (new_stmt, scalar_dest);
890338fd1498Szrj SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
890438fd1498Szrj if (stmt == gsi_stmt (*gsi))
890538fd1498Szrj vect_finish_replace_stmt (stmt, new_stmt);
890638fd1498Szrj else
890738fd1498Szrj {
890838fd1498Szrj /* In this case we're moving the definition to later in the
890938fd1498Szrj block. That doesn't matter because the only uses of the
891038fd1498Szrj lhs are in phi statements. */
891138fd1498Szrj gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
891238fd1498Szrj gsi_remove (&old_gsi, true);
891338fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
891438fd1498Szrj }
891538fd1498Szrj }
891638fd1498Szrj else
891738fd1498Szrj {
891838fd1498Szrj new_temp = make_ssa_name (vec_dest);
891938fd1498Szrj new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
892038fd1498Szrj vec_compare, vec_then_clause,
892138fd1498Szrj vec_else_clause);
892238fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
892338fd1498Szrj }
892438fd1498Szrj if (slp_node)
892538fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
892638fd1498Szrj }
892738fd1498Szrj
892838fd1498Szrj if (slp_node)
892938fd1498Szrj continue;
893038fd1498Szrj
893138fd1498Szrj if (j == 0)
893238fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
893338fd1498Szrj else
893438fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
893538fd1498Szrj
893638fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
893738fd1498Szrj }
893838fd1498Szrj
893938fd1498Szrj vec_oprnds0.release ();
894038fd1498Szrj vec_oprnds1.release ();
894138fd1498Szrj vec_oprnds2.release ();
894238fd1498Szrj vec_oprnds3.release ();
894338fd1498Szrj
894438fd1498Szrj return true;
894538fd1498Szrj }
894638fd1498Szrj
894738fd1498Szrj /* vectorizable_comparison.
894838fd1498Szrj
894938fd1498Szrj Check if STMT is comparison expression that can be vectorized.
895038fd1498Szrj If VEC_STMT is also passed, vectorize the STMT: create a vectorized
895138fd1498Szrj comparison, put it in VEC_STMT, and insert it at GSI.
895238fd1498Szrj
895338fd1498Szrj Return FALSE if not a vectorizable STMT, TRUE otherwise. */
895438fd1498Szrj
895538fd1498Szrj static bool
vectorizable_comparison(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,slp_tree slp_node)895638fd1498Szrj vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
895738fd1498Szrj gimple **vec_stmt, tree reduc_def,
895838fd1498Szrj slp_tree slp_node)
895938fd1498Szrj {
896038fd1498Szrj tree lhs, rhs1, rhs2;
896138fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
896238fd1498Szrj tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
896338fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896438fd1498Szrj tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
896538fd1498Szrj tree new_temp;
896638fd1498Szrj loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
896738fd1498Szrj enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
896838fd1498Szrj int ndts = 2;
896938fd1498Szrj poly_uint64 nunits;
897038fd1498Szrj int ncopies;
897138fd1498Szrj enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
897238fd1498Szrj stmt_vec_info prev_stmt_info = NULL;
897338fd1498Szrj int i, j;
897438fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
897538fd1498Szrj vec<tree> vec_oprnds0 = vNULL;
897638fd1498Szrj vec<tree> vec_oprnds1 = vNULL;
897738fd1498Szrj gimple *def_stmt;
897838fd1498Szrj tree mask_type;
897938fd1498Szrj tree mask;
898038fd1498Szrj
898138fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
898238fd1498Szrj return false;
898338fd1498Szrj
898438fd1498Szrj if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
898538fd1498Szrj return false;
898638fd1498Szrj
898738fd1498Szrj mask_type = vectype;
898838fd1498Szrj nunits = TYPE_VECTOR_SUBPARTS (vectype);
898938fd1498Szrj
899038fd1498Szrj if (slp_node)
899138fd1498Szrj ncopies = 1;
899238fd1498Szrj else
899338fd1498Szrj ncopies = vect_get_num_copies (loop_vinfo, vectype);
899438fd1498Szrj
899538fd1498Szrj gcc_assert (ncopies >= 1);
899638fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
899738fd1498Szrj && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
899838fd1498Szrj && reduc_def))
899938fd1498Szrj return false;
900038fd1498Szrj
900138fd1498Szrj if (STMT_VINFO_LIVE_P (stmt_info))
900238fd1498Szrj {
900338fd1498Szrj if (dump_enabled_p ())
900438fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
900538fd1498Szrj "value used after loop.\n");
900638fd1498Szrj return false;
900738fd1498Szrj }
900838fd1498Szrj
900938fd1498Szrj if (!is_gimple_assign (stmt))
901038fd1498Szrj return false;
901138fd1498Szrj
901238fd1498Szrj code = gimple_assign_rhs_code (stmt);
901338fd1498Szrj
901438fd1498Szrj if (TREE_CODE_CLASS (code) != tcc_comparison)
901538fd1498Szrj return false;
901638fd1498Szrj
901738fd1498Szrj rhs1 = gimple_assign_rhs1 (stmt);
901838fd1498Szrj rhs2 = gimple_assign_rhs2 (stmt);
901938fd1498Szrj
902038fd1498Szrj if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
902138fd1498Szrj &dts[0], &vectype1))
902238fd1498Szrj return false;
902338fd1498Szrj
902438fd1498Szrj if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
902538fd1498Szrj &dts[1], &vectype2))
902638fd1498Szrj return false;
902738fd1498Szrj
902838fd1498Szrj if (vectype1 && vectype2
902938fd1498Szrj && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
903038fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype2)))
903138fd1498Szrj return false;
903238fd1498Szrj
903338fd1498Szrj vectype = vectype1 ? vectype1 : vectype2;
903438fd1498Szrj
903538fd1498Szrj /* Invariant comparison. */
903638fd1498Szrj if (!vectype)
903738fd1498Szrj {
903838fd1498Szrj vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
903938fd1498Szrj if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
904038fd1498Szrj return false;
904138fd1498Szrj }
904238fd1498Szrj else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
904338fd1498Szrj return false;
904438fd1498Szrj
904538fd1498Szrj /* Can't compare mask and non-mask types. */
904638fd1498Szrj if (vectype1 && vectype2
904738fd1498Szrj && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
904838fd1498Szrj return false;
904938fd1498Szrj
905038fd1498Szrj /* Boolean values may have another representation in vectors
905138fd1498Szrj and therefore we prefer bit operations over comparison for
905238fd1498Szrj them (which also works for scalar masks). We store opcodes
905338fd1498Szrj to use in bitop1 and bitop2. Statement is vectorized as
905438fd1498Szrj BITOP2 (rhs1 BITOP1 rhs2) or
905538fd1498Szrj rhs1 BITOP2 (BITOP1 rhs2)
905638fd1498Szrj depending on bitop1 and bitop2 arity. */
905738fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (vectype))
905838fd1498Szrj {
905938fd1498Szrj if (code == GT_EXPR)
906038fd1498Szrj {
906138fd1498Szrj bitop1 = BIT_NOT_EXPR;
906238fd1498Szrj bitop2 = BIT_AND_EXPR;
906338fd1498Szrj }
906438fd1498Szrj else if (code == GE_EXPR)
906538fd1498Szrj {
906638fd1498Szrj bitop1 = BIT_NOT_EXPR;
906738fd1498Szrj bitop2 = BIT_IOR_EXPR;
906838fd1498Szrj }
906938fd1498Szrj else if (code == LT_EXPR)
907038fd1498Szrj {
907138fd1498Szrj bitop1 = BIT_NOT_EXPR;
907238fd1498Szrj bitop2 = BIT_AND_EXPR;
907338fd1498Szrj std::swap (rhs1, rhs2);
907438fd1498Szrj std::swap (dts[0], dts[1]);
907538fd1498Szrj }
907638fd1498Szrj else if (code == LE_EXPR)
907738fd1498Szrj {
907838fd1498Szrj bitop1 = BIT_NOT_EXPR;
907938fd1498Szrj bitop2 = BIT_IOR_EXPR;
908038fd1498Szrj std::swap (rhs1, rhs2);
908138fd1498Szrj std::swap (dts[0], dts[1]);
908238fd1498Szrj }
908338fd1498Szrj else
908438fd1498Szrj {
908538fd1498Szrj bitop1 = BIT_XOR_EXPR;
908638fd1498Szrj if (code == EQ_EXPR)
908738fd1498Szrj bitop2 = BIT_NOT_EXPR;
908838fd1498Szrj }
908938fd1498Szrj }
909038fd1498Szrj
909138fd1498Szrj if (!vec_stmt)
909238fd1498Szrj {
909338fd1498Szrj STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
909438fd1498Szrj if (!slp_node)
909538fd1498Szrj vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
909638fd1498Szrj dts, ndts, NULL, NULL);
909738fd1498Szrj if (bitop1 == NOP_EXPR)
909838fd1498Szrj return expand_vec_cmp_expr_p (vectype, mask_type, code);
909938fd1498Szrj else
910038fd1498Szrj {
910138fd1498Szrj machine_mode mode = TYPE_MODE (vectype);
910238fd1498Szrj optab optab;
910338fd1498Szrj
910438fd1498Szrj optab = optab_for_tree_code (bitop1, vectype, optab_default);
910538fd1498Szrj if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
910638fd1498Szrj return false;
910738fd1498Szrj
910838fd1498Szrj if (bitop2 != NOP_EXPR)
910938fd1498Szrj {
911038fd1498Szrj optab = optab_for_tree_code (bitop2, vectype, optab_default);
911138fd1498Szrj if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
911238fd1498Szrj return false;
911338fd1498Szrj }
911438fd1498Szrj return true;
911538fd1498Szrj }
911638fd1498Szrj }
911738fd1498Szrj
911838fd1498Szrj /* Transform. */
911938fd1498Szrj if (!slp_node)
912038fd1498Szrj {
912138fd1498Szrj vec_oprnds0.create (1);
912238fd1498Szrj vec_oprnds1.create (1);
912338fd1498Szrj }
912438fd1498Szrj
912538fd1498Szrj /* Handle def. */
912638fd1498Szrj lhs = gimple_assign_lhs (stmt);
912738fd1498Szrj mask = vect_create_destination_var (lhs, mask_type);
912838fd1498Szrj
912938fd1498Szrj /* Handle cmp expr. */
913038fd1498Szrj for (j = 0; j < ncopies; j++)
913138fd1498Szrj {
913238fd1498Szrj gassign *new_stmt = NULL;
913338fd1498Szrj if (j == 0)
913438fd1498Szrj {
913538fd1498Szrj if (slp_node)
913638fd1498Szrj {
913738fd1498Szrj auto_vec<tree, 2> ops;
913838fd1498Szrj auto_vec<vec<tree>, 2> vec_defs;
913938fd1498Szrj
914038fd1498Szrj ops.safe_push (rhs1);
914138fd1498Szrj ops.safe_push (rhs2);
914238fd1498Szrj vect_get_slp_defs (ops, slp_node, &vec_defs);
914338fd1498Szrj vec_oprnds1 = vec_defs.pop ();
914438fd1498Szrj vec_oprnds0 = vec_defs.pop ();
914538fd1498Szrj }
914638fd1498Szrj else
914738fd1498Szrj {
914838fd1498Szrj vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
914938fd1498Szrj vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
915038fd1498Szrj }
915138fd1498Szrj }
915238fd1498Szrj else
915338fd1498Szrj {
915438fd1498Szrj vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
915538fd1498Szrj vec_oprnds0.pop ());
915638fd1498Szrj vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
915738fd1498Szrj vec_oprnds1.pop ());
915838fd1498Szrj }
915938fd1498Szrj
916038fd1498Szrj if (!slp_node)
916138fd1498Szrj {
916238fd1498Szrj vec_oprnds0.quick_push (vec_rhs1);
916338fd1498Szrj vec_oprnds1.quick_push (vec_rhs2);
916438fd1498Szrj }
916538fd1498Szrj
916638fd1498Szrj /* Arguments are ready. Create the new vector stmt. */
916738fd1498Szrj FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
916838fd1498Szrj {
916938fd1498Szrj vec_rhs2 = vec_oprnds1[i];
917038fd1498Szrj
917138fd1498Szrj new_temp = make_ssa_name (mask);
917238fd1498Szrj if (bitop1 == NOP_EXPR)
917338fd1498Szrj {
917438fd1498Szrj new_stmt = gimple_build_assign (new_temp, code,
917538fd1498Szrj vec_rhs1, vec_rhs2);
917638fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
917738fd1498Szrj }
917838fd1498Szrj else
917938fd1498Szrj {
918038fd1498Szrj if (bitop1 == BIT_NOT_EXPR)
918138fd1498Szrj new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
918238fd1498Szrj else
918338fd1498Szrj new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
918438fd1498Szrj vec_rhs2);
918538fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
918638fd1498Szrj if (bitop2 != NOP_EXPR)
918738fd1498Szrj {
918838fd1498Szrj tree res = make_ssa_name (mask);
918938fd1498Szrj if (bitop2 == BIT_NOT_EXPR)
919038fd1498Szrj new_stmt = gimple_build_assign (res, bitop2, new_temp);
919138fd1498Szrj else
919238fd1498Szrj new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
919338fd1498Szrj new_temp);
919438fd1498Szrj vect_finish_stmt_generation (stmt, new_stmt, gsi);
919538fd1498Szrj }
919638fd1498Szrj }
919738fd1498Szrj if (slp_node)
919838fd1498Szrj SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
919938fd1498Szrj }
920038fd1498Szrj
920138fd1498Szrj if (slp_node)
920238fd1498Szrj continue;
920338fd1498Szrj
920438fd1498Szrj if (j == 0)
920538fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
920638fd1498Szrj else
920738fd1498Szrj STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
920838fd1498Szrj
920938fd1498Szrj prev_stmt_info = vinfo_for_stmt (new_stmt);
921038fd1498Szrj }
921138fd1498Szrj
921238fd1498Szrj vec_oprnds0.release ();
921338fd1498Szrj vec_oprnds1.release ();
921438fd1498Szrj
921538fd1498Szrj return true;
921638fd1498Szrj }
921738fd1498Szrj
921838fd1498Szrj /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
921938fd1498Szrj can handle all live statements in the node. Otherwise return true
922038fd1498Szrj if STMT is not live or if vectorizable_live_operation can handle it.
922138fd1498Szrj GSI and VEC_STMT are as for vectorizable_live_operation. */
922238fd1498Szrj
922338fd1498Szrj static bool
can_vectorize_live_stmts(gimple * stmt,gimple_stmt_iterator * gsi,slp_tree slp_node,gimple ** vec_stmt)922438fd1498Szrj can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
922538fd1498Szrj slp_tree slp_node, gimple **vec_stmt)
922638fd1498Szrj {
922738fd1498Szrj if (slp_node)
922838fd1498Szrj {
922938fd1498Szrj gimple *slp_stmt;
923038fd1498Szrj unsigned int i;
923138fd1498Szrj FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
923238fd1498Szrj {
923338fd1498Szrj stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
923438fd1498Szrj if (STMT_VINFO_LIVE_P (slp_stmt_info)
923538fd1498Szrj && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
923638fd1498Szrj vec_stmt))
923738fd1498Szrj return false;
923838fd1498Szrj }
923938fd1498Szrj }
924038fd1498Szrj else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
924138fd1498Szrj && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
924238fd1498Szrj return false;
924338fd1498Szrj
924438fd1498Szrj return true;
924538fd1498Szrj }
924638fd1498Szrj
924738fd1498Szrj /* Make sure the statement is vectorizable. */
924838fd1498Szrj
924938fd1498Szrj bool
vect_analyze_stmt(gimple * stmt,bool * need_to_vectorize,slp_tree node,slp_instance node_instance)925038fd1498Szrj vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
925138fd1498Szrj slp_instance node_instance)
925238fd1498Szrj {
925338fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
925438fd1498Szrj bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
925538fd1498Szrj enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
925638fd1498Szrj bool ok;
925738fd1498Szrj gimple *pattern_stmt;
925838fd1498Szrj gimple_seq pattern_def_seq;
925938fd1498Szrj
926038fd1498Szrj if (dump_enabled_p ())
926138fd1498Szrj {
926238fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
926338fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
926438fd1498Szrj }
926538fd1498Szrj
926638fd1498Szrj if (gimple_has_volatile_ops (stmt))
926738fd1498Szrj {
926838fd1498Szrj if (dump_enabled_p ())
926938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
927038fd1498Szrj "not vectorized: stmt has volatile operands\n");
927138fd1498Szrj
927238fd1498Szrj return false;
927338fd1498Szrj }
927438fd1498Szrj
927538fd1498Szrj /* Skip stmts that do not need to be vectorized. In loops this is expected
927638fd1498Szrj to include:
927738fd1498Szrj - the COND_EXPR which is the loop exit condition
927838fd1498Szrj - any LABEL_EXPRs in the loop
927938fd1498Szrj - computations that are used only for array indexing or loop control.
928038fd1498Szrj In basic blocks we only analyze statements that are a part of some SLP
928138fd1498Szrj instance, therefore, all the statements are relevant.
928238fd1498Szrj
928338fd1498Szrj Pattern statement needs to be analyzed instead of the original statement
928438fd1498Szrj if the original statement is not relevant. Otherwise, we analyze both
928538fd1498Szrj statements. In basic blocks we are called from some SLP instance
928638fd1498Szrj traversal, don't analyze pattern stmts instead, the pattern stmts
928738fd1498Szrj already will be part of SLP instance. */
928838fd1498Szrj
928938fd1498Szrj pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
929038fd1498Szrj if (!STMT_VINFO_RELEVANT_P (stmt_info)
929138fd1498Szrj && !STMT_VINFO_LIVE_P (stmt_info))
929238fd1498Szrj {
929338fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_info)
929438fd1498Szrj && pattern_stmt
929538fd1498Szrj && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
929638fd1498Szrj || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
929738fd1498Szrj {
929838fd1498Szrj /* Analyze PATTERN_STMT instead of the original stmt. */
929938fd1498Szrj stmt = pattern_stmt;
930038fd1498Szrj stmt_info = vinfo_for_stmt (pattern_stmt);
930138fd1498Szrj if (dump_enabled_p ())
930238fd1498Szrj {
930338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
930438fd1498Szrj "==> examining pattern statement: ");
930538fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
930638fd1498Szrj }
930738fd1498Szrj }
930838fd1498Szrj else
930938fd1498Szrj {
931038fd1498Szrj if (dump_enabled_p ())
931138fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
931238fd1498Szrj
931338fd1498Szrj return true;
931438fd1498Szrj }
931538fd1498Szrj }
931638fd1498Szrj else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
931738fd1498Szrj && node == NULL
931838fd1498Szrj && pattern_stmt
931938fd1498Szrj && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
932038fd1498Szrj || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
932138fd1498Szrj {
932238fd1498Szrj /* Analyze PATTERN_STMT too. */
932338fd1498Szrj if (dump_enabled_p ())
932438fd1498Szrj {
932538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
932638fd1498Szrj "==> examining pattern statement: ");
932738fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
932838fd1498Szrj }
932938fd1498Szrj
933038fd1498Szrj if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
933138fd1498Szrj node_instance))
933238fd1498Szrj return false;
933338fd1498Szrj }
933438fd1498Szrj
933538fd1498Szrj if (is_pattern_stmt_p (stmt_info)
933638fd1498Szrj && node == NULL
933738fd1498Szrj && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
933838fd1498Szrj {
933938fd1498Szrj gimple_stmt_iterator si;
934038fd1498Szrj
934138fd1498Szrj for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
934238fd1498Szrj {
934338fd1498Szrj gimple *pattern_def_stmt = gsi_stmt (si);
934438fd1498Szrj if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
934538fd1498Szrj || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
934638fd1498Szrj {
934738fd1498Szrj /* Analyze def stmt of STMT if it's a pattern stmt. */
934838fd1498Szrj if (dump_enabled_p ())
934938fd1498Szrj {
935038fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
935138fd1498Szrj "==> examining pattern def statement: ");
935238fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
935338fd1498Szrj }
935438fd1498Szrj
935538fd1498Szrj if (!vect_analyze_stmt (pattern_def_stmt,
935638fd1498Szrj need_to_vectorize, node, node_instance))
935738fd1498Szrj return false;
935838fd1498Szrj }
935938fd1498Szrj }
936038fd1498Szrj }
936138fd1498Szrj
936238fd1498Szrj switch (STMT_VINFO_DEF_TYPE (stmt_info))
936338fd1498Szrj {
936438fd1498Szrj case vect_internal_def:
936538fd1498Szrj break;
936638fd1498Szrj
936738fd1498Szrj case vect_reduction_def:
936838fd1498Szrj case vect_nested_cycle:
936938fd1498Szrj gcc_assert (!bb_vinfo
937038fd1498Szrj && (relevance == vect_used_in_outer
937138fd1498Szrj || relevance == vect_used_in_outer_by_reduction
937238fd1498Szrj || relevance == vect_used_by_reduction
937338fd1498Szrj || relevance == vect_unused_in_scope
937438fd1498Szrj || relevance == vect_used_only_live));
937538fd1498Szrj break;
937638fd1498Szrj
937738fd1498Szrj case vect_induction_def:
937838fd1498Szrj gcc_assert (!bb_vinfo);
937938fd1498Szrj break;
938038fd1498Szrj
938138fd1498Szrj case vect_constant_def:
938238fd1498Szrj case vect_external_def:
938338fd1498Szrj case vect_unknown_def_type:
938438fd1498Szrj default:
938538fd1498Szrj gcc_unreachable ();
938638fd1498Szrj }
938738fd1498Szrj
938838fd1498Szrj if (STMT_VINFO_RELEVANT_P (stmt_info))
938938fd1498Szrj {
939038fd1498Szrj gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
939138fd1498Szrj gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
939238fd1498Szrj || (is_gimple_call (stmt)
939338fd1498Szrj && gimple_call_lhs (stmt) == NULL_TREE));
939438fd1498Szrj *need_to_vectorize = true;
939538fd1498Szrj }
939638fd1498Szrj
939738fd1498Szrj if (PURE_SLP_STMT (stmt_info) && !node)
939838fd1498Szrj {
939938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
940038fd1498Szrj "handled only by SLP analysis\n");
940138fd1498Szrj return true;
940238fd1498Szrj }
940338fd1498Szrj
940438fd1498Szrj ok = true;
940538fd1498Szrj if (!bb_vinfo
940638fd1498Szrj && (STMT_VINFO_RELEVANT_P (stmt_info)
940738fd1498Szrj || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
940838fd1498Szrj ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
940938fd1498Szrj || vectorizable_conversion (stmt, NULL, NULL, node)
941038fd1498Szrj || vectorizable_shift (stmt, NULL, NULL, node)
941138fd1498Szrj || vectorizable_operation (stmt, NULL, NULL, node)
941238fd1498Szrj || vectorizable_assignment (stmt, NULL, NULL, node)
941338fd1498Szrj || vectorizable_load (stmt, NULL, NULL, node, NULL)
941438fd1498Szrj || vectorizable_call (stmt, NULL, NULL, node)
941538fd1498Szrj || vectorizable_store (stmt, NULL, NULL, node)
941638fd1498Szrj || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
941738fd1498Szrj || vectorizable_induction (stmt, NULL, NULL, node)
941838fd1498Szrj || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
941938fd1498Szrj || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
942038fd1498Szrj else
942138fd1498Szrj {
942238fd1498Szrj if (bb_vinfo)
942338fd1498Szrj ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
942438fd1498Szrj || vectorizable_conversion (stmt, NULL, NULL, node)
942538fd1498Szrj || vectorizable_shift (stmt, NULL, NULL, node)
942638fd1498Szrj || vectorizable_operation (stmt, NULL, NULL, node)
942738fd1498Szrj || vectorizable_assignment (stmt, NULL, NULL, node)
942838fd1498Szrj || vectorizable_load (stmt, NULL, NULL, node, NULL)
942938fd1498Szrj || vectorizable_call (stmt, NULL, NULL, node)
943038fd1498Szrj || vectorizable_store (stmt, NULL, NULL, node)
943138fd1498Szrj || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
943238fd1498Szrj || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
943338fd1498Szrj }
943438fd1498Szrj
943538fd1498Szrj if (!ok)
943638fd1498Szrj {
943738fd1498Szrj if (dump_enabled_p ())
943838fd1498Szrj {
943938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
944038fd1498Szrj "not vectorized: relevant stmt not ");
944138fd1498Szrj dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
944238fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
944338fd1498Szrj }
944438fd1498Szrj
944538fd1498Szrj return false;
944638fd1498Szrj }
944738fd1498Szrj
944838fd1498Szrj if (bb_vinfo)
944938fd1498Szrj return true;
945038fd1498Szrj
945138fd1498Szrj /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
945238fd1498Szrj need extra handling, except for vectorizable reductions. */
945338fd1498Szrj if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
945438fd1498Szrj && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
945538fd1498Szrj {
945638fd1498Szrj if (dump_enabled_p ())
945738fd1498Szrj {
945838fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
945938fd1498Szrj "not vectorized: live stmt not supported: ");
946038fd1498Szrj dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
946138fd1498Szrj }
946238fd1498Szrj
946338fd1498Szrj return false;
946438fd1498Szrj }
946538fd1498Szrj
946638fd1498Szrj return true;
946738fd1498Szrj }
946838fd1498Szrj
946938fd1498Szrj
947038fd1498Szrj /* Function vect_transform_stmt.
947138fd1498Szrj
947238fd1498Szrj Create a vectorized stmt to replace STMT, and insert it at BSI. */
947338fd1498Szrj
947438fd1498Szrj bool
vect_transform_stmt(gimple * stmt,gimple_stmt_iterator * gsi,bool * grouped_store,slp_tree slp_node,slp_instance slp_node_instance)947538fd1498Szrj vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
947638fd1498Szrj bool *grouped_store, slp_tree slp_node,
947738fd1498Szrj slp_instance slp_node_instance)
947838fd1498Szrj {
947938fd1498Szrj bool is_store = false;
948038fd1498Szrj gimple *vec_stmt = NULL;
948138fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
948238fd1498Szrj bool done;
948338fd1498Szrj
948438fd1498Szrj gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
948538fd1498Szrj gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
948638fd1498Szrj
948738fd1498Szrj bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
948838fd1498Szrj && nested_in_vect_loop_p
948938fd1498Szrj (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
949038fd1498Szrj stmt));
949138fd1498Szrj
949238fd1498Szrj switch (STMT_VINFO_TYPE (stmt_info))
949338fd1498Szrj {
949438fd1498Szrj case type_demotion_vec_info_type:
949538fd1498Szrj case type_promotion_vec_info_type:
949638fd1498Szrj case type_conversion_vec_info_type:
949738fd1498Szrj done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
949838fd1498Szrj gcc_assert (done);
949938fd1498Szrj break;
950038fd1498Szrj
950138fd1498Szrj case induc_vec_info_type:
950238fd1498Szrj done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
950338fd1498Szrj gcc_assert (done);
950438fd1498Szrj break;
950538fd1498Szrj
950638fd1498Szrj case shift_vec_info_type:
950738fd1498Szrj done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
950838fd1498Szrj gcc_assert (done);
950938fd1498Szrj break;
951038fd1498Szrj
951138fd1498Szrj case op_vec_info_type:
951238fd1498Szrj done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
951338fd1498Szrj gcc_assert (done);
951438fd1498Szrj break;
951538fd1498Szrj
951638fd1498Szrj case assignment_vec_info_type:
951738fd1498Szrj done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
951838fd1498Szrj gcc_assert (done);
951938fd1498Szrj break;
952038fd1498Szrj
952138fd1498Szrj case load_vec_info_type:
952238fd1498Szrj done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
952338fd1498Szrj slp_node_instance);
952438fd1498Szrj gcc_assert (done);
952538fd1498Szrj break;
952638fd1498Szrj
952738fd1498Szrj case store_vec_info_type:
952838fd1498Szrj done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
952938fd1498Szrj gcc_assert (done);
953038fd1498Szrj if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
953138fd1498Szrj {
953238fd1498Szrj /* In case of interleaving, the whole chain is vectorized when the
953338fd1498Szrj last store in the chain is reached. Store stmts before the last
953438fd1498Szrj one are skipped, and there vec_stmt_info shouldn't be freed
953538fd1498Szrj meanwhile. */
953638fd1498Szrj *grouped_store = true;
953738fd1498Szrj stmt_vec_info group_info
953838fd1498Szrj = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
953938fd1498Szrj if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
954038fd1498Szrj is_store = true;
954138fd1498Szrj }
954238fd1498Szrj else
954338fd1498Szrj is_store = true;
954438fd1498Szrj break;
954538fd1498Szrj
954638fd1498Szrj case condition_vec_info_type:
954738fd1498Szrj done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
954838fd1498Szrj gcc_assert (done);
954938fd1498Szrj break;
955038fd1498Szrj
955138fd1498Szrj case comparison_vec_info_type:
955238fd1498Szrj done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
955338fd1498Szrj gcc_assert (done);
955438fd1498Szrj break;
955538fd1498Szrj
955638fd1498Szrj case call_vec_info_type:
955738fd1498Szrj done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
955838fd1498Szrj stmt = gsi_stmt (*gsi);
955938fd1498Szrj break;
956038fd1498Szrj
956138fd1498Szrj case call_simd_clone_vec_info_type:
956238fd1498Szrj done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
956338fd1498Szrj stmt = gsi_stmt (*gsi);
956438fd1498Szrj break;
956538fd1498Szrj
956638fd1498Szrj case reduc_vec_info_type:
956738fd1498Szrj done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
956838fd1498Szrj slp_node_instance);
956938fd1498Szrj gcc_assert (done);
957038fd1498Szrj break;
957138fd1498Szrj
957238fd1498Szrj default:
957338fd1498Szrj if (!STMT_VINFO_LIVE_P (stmt_info))
957438fd1498Szrj {
957538fd1498Szrj if (dump_enabled_p ())
957638fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
957738fd1498Szrj "stmt not supported.\n");
957838fd1498Szrj gcc_unreachable ();
957938fd1498Szrj }
958038fd1498Szrj }
958138fd1498Szrj
958238fd1498Szrj /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
958338fd1498Szrj This would break hybrid SLP vectorization. */
958438fd1498Szrj if (slp_node)
958538fd1498Szrj gcc_assert (!vec_stmt
958638fd1498Szrj && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
958738fd1498Szrj
958838fd1498Szrj /* Handle inner-loop stmts whose DEF is used in the loop-nest that
958938fd1498Szrj is being vectorized, but outside the immediately enclosing loop. */
959038fd1498Szrj if (vec_stmt
959138fd1498Szrj && nested_p
959238fd1498Szrj && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
959338fd1498Szrj && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
959438fd1498Szrj || STMT_VINFO_RELEVANT (stmt_info) ==
959538fd1498Szrj vect_used_in_outer_by_reduction))
959638fd1498Szrj {
959738fd1498Szrj struct loop *innerloop = LOOP_VINFO_LOOP (
959838fd1498Szrj STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
959938fd1498Szrj imm_use_iterator imm_iter;
960038fd1498Szrj use_operand_p use_p;
960138fd1498Szrj tree scalar_dest;
960238fd1498Szrj gimple *exit_phi;
960338fd1498Szrj
960438fd1498Szrj if (dump_enabled_p ())
960538fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
960638fd1498Szrj "Record the vdef for outer-loop vectorization.\n");
960738fd1498Szrj
960838fd1498Szrj /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
960938fd1498Szrj (to be used when vectorizing outer-loop stmts that use the DEF of
961038fd1498Szrj STMT). */
961138fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI)
961238fd1498Szrj scalar_dest = PHI_RESULT (stmt);
961338fd1498Szrj else
9614*58e805e6Szrj scalar_dest = gimple_get_lhs (stmt);
961538fd1498Szrj
961638fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
961738fd1498Szrj {
961838fd1498Szrj if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
961938fd1498Szrj {
962038fd1498Szrj exit_phi = USE_STMT (use_p);
962138fd1498Szrj STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
962238fd1498Szrj }
962338fd1498Szrj }
962438fd1498Szrj }
962538fd1498Szrj
962638fd1498Szrj /* Handle stmts whose DEF is used outside the loop-nest that is
962738fd1498Szrj being vectorized. */
962838fd1498Szrj if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
962938fd1498Szrj {
963038fd1498Szrj done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
963138fd1498Szrj gcc_assert (done);
963238fd1498Szrj }
963338fd1498Szrj
963438fd1498Szrj if (vec_stmt)
963538fd1498Szrj STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
963638fd1498Szrj
963738fd1498Szrj return is_store;
963838fd1498Szrj }
963938fd1498Szrj
964038fd1498Szrj
964138fd1498Szrj /* Remove a group of stores (for SLP or interleaving), free their
964238fd1498Szrj stmt_vec_info. */
964338fd1498Szrj
964438fd1498Szrj void
vect_remove_stores(gimple * first_stmt)964538fd1498Szrj vect_remove_stores (gimple *first_stmt)
964638fd1498Szrj {
964738fd1498Szrj gimple *next = first_stmt;
964838fd1498Szrj gimple *tmp;
964938fd1498Szrj gimple_stmt_iterator next_si;
965038fd1498Szrj
965138fd1498Szrj while (next)
965238fd1498Szrj {
965338fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (next);
965438fd1498Szrj
965538fd1498Szrj tmp = GROUP_NEXT_ELEMENT (stmt_info);
965638fd1498Szrj if (is_pattern_stmt_p (stmt_info))
965738fd1498Szrj next = STMT_VINFO_RELATED_STMT (stmt_info);
965838fd1498Szrj /* Free the attached stmt_vec_info and remove the stmt. */
965938fd1498Szrj next_si = gsi_for_stmt (next);
966038fd1498Szrj unlink_stmt_vdef (next);
966138fd1498Szrj gsi_remove (&next_si, true);
966238fd1498Szrj release_defs (next);
966338fd1498Szrj free_stmt_vec_info (next);
966438fd1498Szrj next = tmp;
966538fd1498Szrj }
966638fd1498Szrj }
966738fd1498Szrj
966838fd1498Szrj
966938fd1498Szrj /* Function new_stmt_vec_info.
967038fd1498Szrj
967138fd1498Szrj Create and initialize a new stmt_vec_info struct for STMT. */
967238fd1498Szrj
967338fd1498Szrj stmt_vec_info
new_stmt_vec_info(gimple * stmt,vec_info * vinfo)967438fd1498Szrj new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
967538fd1498Szrj {
967638fd1498Szrj stmt_vec_info res;
967738fd1498Szrj res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
967838fd1498Szrj
967938fd1498Szrj STMT_VINFO_TYPE (res) = undef_vec_info_type;
968038fd1498Szrj STMT_VINFO_STMT (res) = stmt;
968138fd1498Szrj res->vinfo = vinfo;
968238fd1498Szrj STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
968338fd1498Szrj STMT_VINFO_LIVE_P (res) = false;
968438fd1498Szrj STMT_VINFO_VECTYPE (res) = NULL;
968538fd1498Szrj STMT_VINFO_VEC_STMT (res) = NULL;
968638fd1498Szrj STMT_VINFO_VECTORIZABLE (res) = true;
968738fd1498Szrj STMT_VINFO_IN_PATTERN_P (res) = false;
968838fd1498Szrj STMT_VINFO_RELATED_STMT (res) = NULL;
968938fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
969038fd1498Szrj STMT_VINFO_DATA_REF (res) = NULL;
969138fd1498Szrj STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
969238fd1498Szrj STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
969338fd1498Szrj
969438fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI
969538fd1498Szrj && is_loop_header_bb_p (gimple_bb (stmt)))
969638fd1498Szrj STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
969738fd1498Szrj else
969838fd1498Szrj STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
969938fd1498Szrj
970038fd1498Szrj STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
970138fd1498Szrj STMT_SLP_TYPE (res) = loop_vect;
970238fd1498Szrj STMT_VINFO_NUM_SLP_USES (res) = 0;
970338fd1498Szrj
970438fd1498Szrj GROUP_FIRST_ELEMENT (res) = NULL;
970538fd1498Szrj GROUP_NEXT_ELEMENT (res) = NULL;
970638fd1498Szrj GROUP_SIZE (res) = 0;
970738fd1498Szrj GROUP_STORE_COUNT (res) = 0;
970838fd1498Szrj GROUP_GAP (res) = 0;
970938fd1498Szrj GROUP_SAME_DR_STMT (res) = NULL;
971038fd1498Szrj
971138fd1498Szrj return res;
971238fd1498Szrj }
971338fd1498Szrj
971438fd1498Szrj
971538fd1498Szrj /* Create a hash table for stmt_vec_info. */
971638fd1498Szrj
971738fd1498Szrj void
init_stmt_vec_info_vec(void)971838fd1498Szrj init_stmt_vec_info_vec (void)
971938fd1498Szrj {
972038fd1498Szrj gcc_assert (!stmt_vec_info_vec.exists ());
972138fd1498Szrj stmt_vec_info_vec.create (50);
972238fd1498Szrj }
972338fd1498Szrj
972438fd1498Szrj
972538fd1498Szrj /* Free hash table for stmt_vec_info. */
972638fd1498Szrj
972738fd1498Szrj void
free_stmt_vec_info_vec(void)972838fd1498Szrj free_stmt_vec_info_vec (void)
972938fd1498Szrj {
973038fd1498Szrj unsigned int i;
973138fd1498Szrj stmt_vec_info info;
973238fd1498Szrj FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
973338fd1498Szrj if (info != NULL)
973438fd1498Szrj free_stmt_vec_info (STMT_VINFO_STMT (info));
973538fd1498Szrj gcc_assert (stmt_vec_info_vec.exists ());
973638fd1498Szrj stmt_vec_info_vec.release ();
973738fd1498Szrj }
973838fd1498Szrj
973938fd1498Szrj
974038fd1498Szrj /* Free stmt vectorization related info. */
974138fd1498Szrj
974238fd1498Szrj void
free_stmt_vec_info(gimple * stmt)974338fd1498Szrj free_stmt_vec_info (gimple *stmt)
974438fd1498Szrj {
974538fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
974638fd1498Szrj
974738fd1498Szrj if (!stmt_info)
974838fd1498Szrj return;
974938fd1498Szrj
975038fd1498Szrj /* Check if this statement has a related "pattern stmt"
975138fd1498Szrj (introduced by the vectorizer during the pattern recognition
975238fd1498Szrj pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
975338fd1498Szrj too. */
975438fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_info))
975538fd1498Szrj {
975638fd1498Szrj stmt_vec_info patt_info
975738fd1498Szrj = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
975838fd1498Szrj if (patt_info)
975938fd1498Szrj {
976038fd1498Szrj gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
976138fd1498Szrj gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
976238fd1498Szrj gimple_set_bb (patt_stmt, NULL);
976338fd1498Szrj tree lhs = gimple_get_lhs (patt_stmt);
976438fd1498Szrj if (lhs && TREE_CODE (lhs) == SSA_NAME)
976538fd1498Szrj release_ssa_name (lhs);
976638fd1498Szrj if (seq)
976738fd1498Szrj {
976838fd1498Szrj gimple_stmt_iterator si;
976938fd1498Szrj for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
977038fd1498Szrj {
977138fd1498Szrj gimple *seq_stmt = gsi_stmt (si);
977238fd1498Szrj gimple_set_bb (seq_stmt, NULL);
977338fd1498Szrj lhs = gimple_get_lhs (seq_stmt);
977438fd1498Szrj if (lhs && TREE_CODE (lhs) == SSA_NAME)
977538fd1498Szrj release_ssa_name (lhs);
977638fd1498Szrj free_stmt_vec_info (seq_stmt);
977738fd1498Szrj }
977838fd1498Szrj }
977938fd1498Szrj free_stmt_vec_info (patt_stmt);
978038fd1498Szrj }
978138fd1498Szrj }
978238fd1498Szrj
978338fd1498Szrj STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
978438fd1498Szrj STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
978538fd1498Szrj set_vinfo_for_stmt (stmt, NULL);
978638fd1498Szrj free (stmt_info);
978738fd1498Szrj }
978838fd1498Szrj
978938fd1498Szrj
979038fd1498Szrj /* Function get_vectype_for_scalar_type_and_size.
979138fd1498Szrj
979238fd1498Szrj Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
979338fd1498Szrj by the target. */
979438fd1498Szrj
979538fd1498Szrj tree
get_vectype_for_scalar_type_and_size(tree scalar_type,poly_uint64 size)979638fd1498Szrj get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
979738fd1498Szrj {
979838fd1498Szrj tree orig_scalar_type = scalar_type;
979938fd1498Szrj scalar_mode inner_mode;
980038fd1498Szrj machine_mode simd_mode;
980138fd1498Szrj poly_uint64 nunits;
980238fd1498Szrj tree vectype;
980338fd1498Szrj
980438fd1498Szrj if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
980538fd1498Szrj && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
980638fd1498Szrj return NULL_TREE;
980738fd1498Szrj
980838fd1498Szrj unsigned int nbytes = GET_MODE_SIZE (inner_mode);
980938fd1498Szrj
981038fd1498Szrj /* For vector types of elements whose mode precision doesn't
981138fd1498Szrj match their types precision we use a element type of mode
981238fd1498Szrj precision. The vectorization routines will have to make sure
981338fd1498Szrj they support the proper result truncation/extension.
981438fd1498Szrj We also make sure to build vector types with INTEGER_TYPE
981538fd1498Szrj component type only. */
981638fd1498Szrj if (INTEGRAL_TYPE_P (scalar_type)
981738fd1498Szrj && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
981838fd1498Szrj || TREE_CODE (scalar_type) != INTEGER_TYPE))
981938fd1498Szrj scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
982038fd1498Szrj TYPE_UNSIGNED (scalar_type));
982138fd1498Szrj
982238fd1498Szrj /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
982338fd1498Szrj When the component mode passes the above test simply use a type
982438fd1498Szrj corresponding to that mode. The theory is that any use that
982538fd1498Szrj would cause problems with this will disable vectorization anyway. */
982638fd1498Szrj else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
982738fd1498Szrj && !INTEGRAL_TYPE_P (scalar_type))
982838fd1498Szrj scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
982938fd1498Szrj
983038fd1498Szrj /* We can't build a vector type of elements with alignment bigger than
983138fd1498Szrj their size. */
983238fd1498Szrj else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
983338fd1498Szrj scalar_type = lang_hooks.types.type_for_mode (inner_mode,
983438fd1498Szrj TYPE_UNSIGNED (scalar_type));
983538fd1498Szrj
983638fd1498Szrj /* If we felt back to using the mode fail if there was
983738fd1498Szrj no scalar type for it. */
983838fd1498Szrj if (scalar_type == NULL_TREE)
983938fd1498Szrj return NULL_TREE;
984038fd1498Szrj
984138fd1498Szrj /* If no size was supplied use the mode the target prefers. Otherwise
984238fd1498Szrj lookup a vector mode of the specified size. */
984338fd1498Szrj if (known_eq (size, 0U))
984438fd1498Szrj simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
984538fd1498Szrj else if (!multiple_p (size, nbytes, &nunits)
984638fd1498Szrj || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
984738fd1498Szrj return NULL_TREE;
984838fd1498Szrj /* NOTE: nunits == 1 is allowed to support single element vector types. */
984938fd1498Szrj if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
985038fd1498Szrj return NULL_TREE;
985138fd1498Szrj
985238fd1498Szrj vectype = build_vector_type (scalar_type, nunits);
985338fd1498Szrj
985438fd1498Szrj if (!VECTOR_MODE_P (TYPE_MODE (vectype))
985538fd1498Szrj && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
985638fd1498Szrj return NULL_TREE;
985738fd1498Szrj
985838fd1498Szrj /* Re-attach the address-space qualifier if we canonicalized the scalar
985938fd1498Szrj type. */
986038fd1498Szrj if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
986138fd1498Szrj return build_qualified_type
986238fd1498Szrj (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
986338fd1498Szrj
986438fd1498Szrj return vectype;
986538fd1498Szrj }
986638fd1498Szrj
986738fd1498Szrj poly_uint64 current_vector_size;
986838fd1498Szrj
986938fd1498Szrj /* Function get_vectype_for_scalar_type.
987038fd1498Szrj
987138fd1498Szrj Returns the vector type corresponding to SCALAR_TYPE as supported
987238fd1498Szrj by the target. */
987338fd1498Szrj
987438fd1498Szrj tree
get_vectype_for_scalar_type(tree scalar_type)987538fd1498Szrj get_vectype_for_scalar_type (tree scalar_type)
987638fd1498Szrj {
987738fd1498Szrj tree vectype;
987838fd1498Szrj vectype = get_vectype_for_scalar_type_and_size (scalar_type,
987938fd1498Szrj current_vector_size);
988038fd1498Szrj if (vectype
988138fd1498Szrj && known_eq (current_vector_size, 0U))
988238fd1498Szrj current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
988338fd1498Szrj return vectype;
988438fd1498Szrj }
988538fd1498Szrj
988638fd1498Szrj /* Function get_mask_type_for_scalar_type.
988738fd1498Szrj
988838fd1498Szrj Returns the mask type corresponding to a result of comparison
988938fd1498Szrj of vectors of specified SCALAR_TYPE as supported by target. */
989038fd1498Szrj
989138fd1498Szrj tree
get_mask_type_for_scalar_type(tree scalar_type)989238fd1498Szrj get_mask_type_for_scalar_type (tree scalar_type)
989338fd1498Szrj {
989438fd1498Szrj tree vectype = get_vectype_for_scalar_type (scalar_type);
989538fd1498Szrj
989638fd1498Szrj if (!vectype)
989738fd1498Szrj return NULL;
989838fd1498Szrj
989938fd1498Szrj return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
990038fd1498Szrj current_vector_size);
990138fd1498Szrj }
990238fd1498Szrj
990338fd1498Szrj /* Function get_same_sized_vectype
990438fd1498Szrj
990538fd1498Szrj Returns a vector type corresponding to SCALAR_TYPE of size
990638fd1498Szrj VECTOR_TYPE if supported by the target. */
990738fd1498Szrj
990838fd1498Szrj tree
get_same_sized_vectype(tree scalar_type,tree vector_type)990938fd1498Szrj get_same_sized_vectype (tree scalar_type, tree vector_type)
991038fd1498Szrj {
991138fd1498Szrj if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
991238fd1498Szrj return build_same_sized_truth_vector_type (vector_type);
991338fd1498Szrj
991438fd1498Szrj return get_vectype_for_scalar_type_and_size
991538fd1498Szrj (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
991638fd1498Szrj }
991738fd1498Szrj
991838fd1498Szrj /* Function vect_is_simple_use.
991938fd1498Szrj
992038fd1498Szrj Input:
992138fd1498Szrj VINFO - the vect info of the loop or basic block that is being vectorized.
992238fd1498Szrj OPERAND - operand in the loop or bb.
992338fd1498Szrj Output:
992438fd1498Szrj DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
992538fd1498Szrj DT - the type of definition
992638fd1498Szrj
992738fd1498Szrj Returns whether a stmt with OPERAND can be vectorized.
992838fd1498Szrj For loops, supportable operands are constants, loop invariants, and operands
992938fd1498Szrj that are defined by the current iteration of the loop. Unsupportable
993038fd1498Szrj operands are those that are defined by a previous iteration of the loop (as
993138fd1498Szrj is the case in reduction/induction computations).
993238fd1498Szrj For basic blocks, supportable operands are constants and bb invariants.
993338fd1498Szrj For now, operands defined outside the basic block are not supported. */
993438fd1498Szrj
993538fd1498Szrj bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt)993638fd1498Szrj vect_is_simple_use (tree operand, vec_info *vinfo,
993738fd1498Szrj gimple **def_stmt, enum vect_def_type *dt)
993838fd1498Szrj {
993938fd1498Szrj *def_stmt = NULL;
994038fd1498Szrj *dt = vect_unknown_def_type;
994138fd1498Szrj
994238fd1498Szrj if (dump_enabled_p ())
994338fd1498Szrj {
994438fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
994538fd1498Szrj "vect_is_simple_use: operand ");
994638fd1498Szrj dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
994738fd1498Szrj dump_printf (MSG_NOTE, "\n");
994838fd1498Szrj }
994938fd1498Szrj
995038fd1498Szrj if (CONSTANT_CLASS_P (operand))
995138fd1498Szrj {
995238fd1498Szrj *dt = vect_constant_def;
995338fd1498Szrj return true;
995438fd1498Szrj }
995538fd1498Szrj
995638fd1498Szrj if (is_gimple_min_invariant (operand))
995738fd1498Szrj {
995838fd1498Szrj *dt = vect_external_def;
995938fd1498Szrj return true;
996038fd1498Szrj }
996138fd1498Szrj
996238fd1498Szrj if (TREE_CODE (operand) != SSA_NAME)
996338fd1498Szrj {
996438fd1498Szrj if (dump_enabled_p ())
996538fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996638fd1498Szrj "not ssa-name.\n");
996738fd1498Szrj return false;
996838fd1498Szrj }
996938fd1498Szrj
997038fd1498Szrj if (SSA_NAME_IS_DEFAULT_DEF (operand))
997138fd1498Szrj {
997238fd1498Szrj *dt = vect_external_def;
997338fd1498Szrj return true;
997438fd1498Szrj }
997538fd1498Szrj
997638fd1498Szrj *def_stmt = SSA_NAME_DEF_STMT (operand);
997738fd1498Szrj if (dump_enabled_p ())
997838fd1498Szrj {
997938fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
998038fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
998138fd1498Szrj }
998238fd1498Szrj
998338fd1498Szrj if (! vect_stmt_in_region_p (vinfo, *def_stmt))
998438fd1498Szrj *dt = vect_external_def;
998538fd1498Szrj else
998638fd1498Szrj {
998738fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
998838fd1498Szrj *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
998938fd1498Szrj }
999038fd1498Szrj
999138fd1498Szrj if (dump_enabled_p ())
999238fd1498Szrj {
999338fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
999438fd1498Szrj switch (*dt)
999538fd1498Szrj {
999638fd1498Szrj case vect_uninitialized_def:
999738fd1498Szrj dump_printf (MSG_NOTE, "uninitialized\n");
999838fd1498Szrj break;
999938fd1498Szrj case vect_constant_def:
1000038fd1498Szrj dump_printf (MSG_NOTE, "constant\n");
1000138fd1498Szrj break;
1000238fd1498Szrj case vect_external_def:
1000338fd1498Szrj dump_printf (MSG_NOTE, "external\n");
1000438fd1498Szrj break;
1000538fd1498Szrj case vect_internal_def:
1000638fd1498Szrj dump_printf (MSG_NOTE, "internal\n");
1000738fd1498Szrj break;
1000838fd1498Szrj case vect_induction_def:
1000938fd1498Szrj dump_printf (MSG_NOTE, "induction\n");
1001038fd1498Szrj break;
1001138fd1498Szrj case vect_reduction_def:
1001238fd1498Szrj dump_printf (MSG_NOTE, "reduction\n");
1001338fd1498Szrj break;
1001438fd1498Szrj case vect_double_reduction_def:
1001538fd1498Szrj dump_printf (MSG_NOTE, "double reduction\n");
1001638fd1498Szrj break;
1001738fd1498Szrj case vect_nested_cycle:
1001838fd1498Szrj dump_printf (MSG_NOTE, "nested cycle\n");
1001938fd1498Szrj break;
1002038fd1498Szrj case vect_unknown_def_type:
1002138fd1498Szrj dump_printf (MSG_NOTE, "unknown\n");
1002238fd1498Szrj break;
1002338fd1498Szrj }
1002438fd1498Szrj }
1002538fd1498Szrj
1002638fd1498Szrj if (*dt == vect_unknown_def_type)
1002738fd1498Szrj {
1002838fd1498Szrj if (dump_enabled_p ())
1002938fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1003038fd1498Szrj "Unsupported pattern.\n");
1003138fd1498Szrj return false;
1003238fd1498Szrj }
1003338fd1498Szrj
1003438fd1498Szrj switch (gimple_code (*def_stmt))
1003538fd1498Szrj {
1003638fd1498Szrj case GIMPLE_PHI:
1003738fd1498Szrj case GIMPLE_ASSIGN:
1003838fd1498Szrj case GIMPLE_CALL:
1003938fd1498Szrj break;
1004038fd1498Szrj default:
1004138fd1498Szrj if (dump_enabled_p ())
1004238fd1498Szrj dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1004338fd1498Szrj "unsupported defining stmt:\n");
1004438fd1498Szrj return false;
1004538fd1498Szrj }
1004638fd1498Szrj
1004738fd1498Szrj return true;
1004838fd1498Szrj }
1004938fd1498Szrj
1005038fd1498Szrj /* Function vect_is_simple_use.
1005138fd1498Szrj
1005238fd1498Szrj Same as vect_is_simple_use but also determines the vector operand
1005338fd1498Szrj type of OPERAND and stores it to *VECTYPE. If the definition of
1005438fd1498Szrj OPERAND is vect_uninitialized_def, vect_constant_def or
1005538fd1498Szrj vect_external_def *VECTYPE will be set to NULL_TREE and the caller
1005638fd1498Szrj is responsible to compute the best suited vector type for the
1005738fd1498Szrj scalar operand. */
1005838fd1498Szrj
1005938fd1498Szrj bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt,tree * vectype)1006038fd1498Szrj vect_is_simple_use (tree operand, vec_info *vinfo,
1006138fd1498Szrj gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
1006238fd1498Szrj {
1006338fd1498Szrj if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
1006438fd1498Szrj return false;
1006538fd1498Szrj
1006638fd1498Szrj /* Now get a vector type if the def is internal, otherwise supply
1006738fd1498Szrj NULL_TREE and leave it up to the caller to figure out a proper
1006838fd1498Szrj type for the use stmt. */
1006938fd1498Szrj if (*dt == vect_internal_def
1007038fd1498Szrj || *dt == vect_induction_def
1007138fd1498Szrj || *dt == vect_reduction_def
1007238fd1498Szrj || *dt == vect_double_reduction_def
1007338fd1498Szrj || *dt == vect_nested_cycle)
1007438fd1498Szrj {
1007538fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
1007638fd1498Szrj
1007738fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1007838fd1498Szrj && !STMT_VINFO_RELEVANT (stmt_info)
1007938fd1498Szrj && !STMT_VINFO_LIVE_P (stmt_info))
1008038fd1498Szrj stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
1008138fd1498Szrj
1008238fd1498Szrj *vectype = STMT_VINFO_VECTYPE (stmt_info);
1008338fd1498Szrj gcc_assert (*vectype != NULL_TREE);
1008438fd1498Szrj }
1008538fd1498Szrj else if (*dt == vect_uninitialized_def
1008638fd1498Szrj || *dt == vect_constant_def
1008738fd1498Szrj || *dt == vect_external_def)
1008838fd1498Szrj *vectype = NULL_TREE;
1008938fd1498Szrj else
1009038fd1498Szrj gcc_unreachable ();
1009138fd1498Szrj
1009238fd1498Szrj return true;
1009338fd1498Szrj }
1009438fd1498Szrj
1009538fd1498Szrj
1009638fd1498Szrj /* Function supportable_widening_operation
1009738fd1498Szrj
1009838fd1498Szrj Check whether an operation represented by the code CODE is a
1009938fd1498Szrj widening operation that is supported by the target platform in
1010038fd1498Szrj vector form (i.e., when operating on arguments of type VECTYPE_IN
1010138fd1498Szrj producing a result of type VECTYPE_OUT).
1010238fd1498Szrj
1010338fd1498Szrj Widening operations we currently support are NOP (CONVERT), FLOAT
1010438fd1498Szrj and WIDEN_MULT. This function checks if these operations are supported
1010538fd1498Szrj by the target platform either directly (via vector tree-codes), or via
1010638fd1498Szrj target builtins.
1010738fd1498Szrj
1010838fd1498Szrj Output:
1010938fd1498Szrj - CODE1 and CODE2 are codes of vector operations to be used when
1011038fd1498Szrj vectorizing the operation, if available.
1011138fd1498Szrj - MULTI_STEP_CVT determines the number of required intermediate steps in
1011238fd1498Szrj case of multi-step conversion (like char->short->int - in that case
1011338fd1498Szrj MULTI_STEP_CVT will be 1).
1011438fd1498Szrj - INTERM_TYPES contains the intermediate type required to perform the
1011538fd1498Szrj widening operation (short in the above example). */
1011638fd1498Szrj
1011738fd1498Szrj bool
supportable_widening_operation(enum tree_code code,gimple * stmt,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)1011838fd1498Szrj supportable_widening_operation (enum tree_code code, gimple *stmt,
1011938fd1498Szrj tree vectype_out, tree vectype_in,
1012038fd1498Szrj enum tree_code *code1, enum tree_code *code2,
1012138fd1498Szrj int *multi_step_cvt,
1012238fd1498Szrj vec<tree> *interm_types)
1012338fd1498Szrj {
1012438fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1012538fd1498Szrj loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
1012638fd1498Szrj struct loop *vect_loop = NULL;
1012738fd1498Szrj machine_mode vec_mode;
1012838fd1498Szrj enum insn_code icode1, icode2;
1012938fd1498Szrj optab optab1, optab2;
1013038fd1498Szrj tree vectype = vectype_in;
1013138fd1498Szrj tree wide_vectype = vectype_out;
1013238fd1498Szrj enum tree_code c1, c2;
1013338fd1498Szrj int i;
1013438fd1498Szrj tree prev_type, intermediate_type;
1013538fd1498Szrj machine_mode intermediate_mode, prev_mode;
1013638fd1498Szrj optab optab3, optab4;
1013738fd1498Szrj
1013838fd1498Szrj *multi_step_cvt = 0;
1013938fd1498Szrj if (loop_info)
1014038fd1498Szrj vect_loop = LOOP_VINFO_LOOP (loop_info);
1014138fd1498Szrj
1014238fd1498Szrj switch (code)
1014338fd1498Szrj {
1014438fd1498Szrj case WIDEN_MULT_EXPR:
1014538fd1498Szrj /* The result of a vectorized widening operation usually requires
1014638fd1498Szrj two vectors (because the widened results do not fit into one vector).
1014738fd1498Szrj The generated vector results would normally be expected to be
1014838fd1498Szrj generated in the same order as in the original scalar computation,
1014938fd1498Szrj i.e. if 8 results are generated in each vector iteration, they are
1015038fd1498Szrj to be organized as follows:
1015138fd1498Szrj vect1: [res1,res2,res3,res4],
1015238fd1498Szrj vect2: [res5,res6,res7,res8].
1015338fd1498Szrj
1015438fd1498Szrj However, in the special case that the result of the widening
1015538fd1498Szrj operation is used in a reduction computation only, the order doesn't
1015638fd1498Szrj matter (because when vectorizing a reduction we change the order of
1015738fd1498Szrj the computation). Some targets can take advantage of this and
1015838fd1498Szrj generate more efficient code. For example, targets like Altivec,
1015938fd1498Szrj that support widen_mult using a sequence of {mult_even,mult_odd}
1016038fd1498Szrj generate the following vectors:
1016138fd1498Szrj vect1: [res1,res3,res5,res7],
1016238fd1498Szrj vect2: [res2,res4,res6,res8].
1016338fd1498Szrj
1016438fd1498Szrj When vectorizing outer-loops, we execute the inner-loop sequentially
1016538fd1498Szrj (each vectorized inner-loop iteration contributes to VF outer-loop
1016638fd1498Szrj iterations in parallel). We therefore don't allow to change the
1016738fd1498Szrj order of the computation in the inner-loop during outer-loop
1016838fd1498Szrj vectorization. */
1016938fd1498Szrj /* TODO: Another case in which order doesn't *really* matter is when we
1017038fd1498Szrj widen and then contract again, e.g. (short)((int)x * y >> 8).
1017138fd1498Szrj Normally, pack_trunc performs an even/odd permute, whereas the
1017238fd1498Szrj repack from an even/odd expansion would be an interleave, which
1017338fd1498Szrj would be significantly simpler for e.g. AVX2. */
1017438fd1498Szrj /* In any case, in order to avoid duplicating the code below, recurse
1017538fd1498Szrj on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
1017638fd1498Szrj are properly set up for the caller. If we fail, we'll continue with
1017738fd1498Szrj a VEC_WIDEN_MULT_LO/HI_EXPR check. */
1017838fd1498Szrj if (vect_loop
1017938fd1498Szrj && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
1018038fd1498Szrj && !nested_in_vect_loop_p (vect_loop, stmt)
1018138fd1498Szrj && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
1018238fd1498Szrj stmt, vectype_out, vectype_in,
1018338fd1498Szrj code1, code2, multi_step_cvt,
1018438fd1498Szrj interm_types))
1018538fd1498Szrj {
1018638fd1498Szrj /* Elements in a vector with vect_used_by_reduction property cannot
1018738fd1498Szrj be reordered if the use chain with this property does not have the
1018838fd1498Szrj same operation. One such an example is s += a * b, where elements
1018938fd1498Szrj in a and b cannot be reordered. Here we check if the vector defined
1019038fd1498Szrj by STMT is only directly used in the reduction statement. */
1019138fd1498Szrj tree lhs = gimple_assign_lhs (stmt);
1019238fd1498Szrj use_operand_p dummy;
1019338fd1498Szrj gimple *use_stmt;
1019438fd1498Szrj stmt_vec_info use_stmt_info = NULL;
1019538fd1498Szrj if (single_imm_use (lhs, &dummy, &use_stmt)
1019638fd1498Szrj && (use_stmt_info = vinfo_for_stmt (use_stmt))
1019738fd1498Szrj && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
1019838fd1498Szrj return true;
1019938fd1498Szrj }
1020038fd1498Szrj c1 = VEC_WIDEN_MULT_LO_EXPR;
1020138fd1498Szrj c2 = VEC_WIDEN_MULT_HI_EXPR;
1020238fd1498Szrj break;
1020338fd1498Szrj
1020438fd1498Szrj case DOT_PROD_EXPR:
1020538fd1498Szrj c1 = DOT_PROD_EXPR;
1020638fd1498Szrj c2 = DOT_PROD_EXPR;
1020738fd1498Szrj break;
1020838fd1498Szrj
1020938fd1498Szrj case SAD_EXPR:
1021038fd1498Szrj c1 = SAD_EXPR;
1021138fd1498Szrj c2 = SAD_EXPR;
1021238fd1498Szrj break;
1021338fd1498Szrj
1021438fd1498Szrj case VEC_WIDEN_MULT_EVEN_EXPR:
1021538fd1498Szrj /* Support the recursion induced just above. */
1021638fd1498Szrj c1 = VEC_WIDEN_MULT_EVEN_EXPR;
1021738fd1498Szrj c2 = VEC_WIDEN_MULT_ODD_EXPR;
1021838fd1498Szrj break;
1021938fd1498Szrj
1022038fd1498Szrj case WIDEN_LSHIFT_EXPR:
1022138fd1498Szrj c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
1022238fd1498Szrj c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
1022338fd1498Szrj break;
1022438fd1498Szrj
1022538fd1498Szrj CASE_CONVERT:
1022638fd1498Szrj c1 = VEC_UNPACK_LO_EXPR;
1022738fd1498Szrj c2 = VEC_UNPACK_HI_EXPR;
1022838fd1498Szrj break;
1022938fd1498Szrj
1023038fd1498Szrj case FLOAT_EXPR:
1023138fd1498Szrj c1 = VEC_UNPACK_FLOAT_LO_EXPR;
1023238fd1498Szrj c2 = VEC_UNPACK_FLOAT_HI_EXPR;
1023338fd1498Szrj break;
1023438fd1498Szrj
1023538fd1498Szrj case FIX_TRUNC_EXPR:
1023638fd1498Szrj /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
1023738fd1498Szrj VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
1023838fd1498Szrj computing the operation. */
1023938fd1498Szrj return false;
1024038fd1498Szrj
1024138fd1498Szrj default:
1024238fd1498Szrj gcc_unreachable ();
1024338fd1498Szrj }
1024438fd1498Szrj
1024538fd1498Szrj if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
1024638fd1498Szrj std::swap (c1, c2);
1024738fd1498Szrj
1024838fd1498Szrj if (code == FIX_TRUNC_EXPR)
1024938fd1498Szrj {
1025038fd1498Szrj /* The signedness is determined from output operand. */
1025138fd1498Szrj optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
1025238fd1498Szrj optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
1025338fd1498Szrj }
1025438fd1498Szrj else
1025538fd1498Szrj {
1025638fd1498Szrj optab1 = optab_for_tree_code (c1, vectype, optab_default);
1025738fd1498Szrj optab2 = optab_for_tree_code (c2, vectype, optab_default);
1025838fd1498Szrj }
1025938fd1498Szrj
1026038fd1498Szrj if (!optab1 || !optab2)
1026138fd1498Szrj return false;
1026238fd1498Szrj
1026338fd1498Szrj vec_mode = TYPE_MODE (vectype);
1026438fd1498Szrj if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
1026538fd1498Szrj || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
1026638fd1498Szrj return false;
1026738fd1498Szrj
1026838fd1498Szrj *code1 = c1;
1026938fd1498Szrj *code2 = c2;
1027038fd1498Szrj
1027138fd1498Szrj if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
1027238fd1498Szrj && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
1027338fd1498Szrj /* For scalar masks we may have different boolean
1027438fd1498Szrj vector types having the same QImode. Thus we
1027538fd1498Szrj add additional check for elements number. */
1027638fd1498Szrj return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1027738fd1498Szrj || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
1027838fd1498Szrj TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
1027938fd1498Szrj
1028038fd1498Szrj /* Check if it's a multi-step conversion that can be done using intermediate
1028138fd1498Szrj types. */
1028238fd1498Szrj
1028338fd1498Szrj prev_type = vectype;
1028438fd1498Szrj prev_mode = vec_mode;
1028538fd1498Szrj
1028638fd1498Szrj if (!CONVERT_EXPR_CODE_P (code))
1028738fd1498Szrj return false;
1028838fd1498Szrj
1028938fd1498Szrj /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
1029038fd1498Szrj intermediate steps in promotion sequence. We try
1029138fd1498Szrj MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
1029238fd1498Szrj not. */
1029338fd1498Szrj interm_types->create (MAX_INTERM_CVT_STEPS);
1029438fd1498Szrj for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
1029538fd1498Szrj {
1029638fd1498Szrj intermediate_mode = insn_data[icode1].operand[0].mode;
1029738fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (prev_type))
1029838fd1498Szrj {
1029938fd1498Szrj intermediate_type = vect_halve_mask_nunits (prev_type);
1030038fd1498Szrj if (intermediate_mode != TYPE_MODE (intermediate_type))
1030138fd1498Szrj return false;
1030238fd1498Szrj }
1030338fd1498Szrj else
1030438fd1498Szrj intermediate_type
1030538fd1498Szrj = lang_hooks.types.type_for_mode (intermediate_mode,
1030638fd1498Szrj TYPE_UNSIGNED (prev_type));
1030738fd1498Szrj
1030838fd1498Szrj optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
1030938fd1498Szrj optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
1031038fd1498Szrj
1031138fd1498Szrj if (!optab3 || !optab4
1031238fd1498Szrj || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
1031338fd1498Szrj || insn_data[icode1].operand[0].mode != intermediate_mode
1031438fd1498Szrj || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
1031538fd1498Szrj || insn_data[icode2].operand[0].mode != intermediate_mode
1031638fd1498Szrj || ((icode1 = optab_handler (optab3, intermediate_mode))
1031738fd1498Szrj == CODE_FOR_nothing)
1031838fd1498Szrj || ((icode2 = optab_handler (optab4, intermediate_mode))
1031938fd1498Szrj == CODE_FOR_nothing))
1032038fd1498Szrj break;
1032138fd1498Szrj
1032238fd1498Szrj interm_types->quick_push (intermediate_type);
1032338fd1498Szrj (*multi_step_cvt)++;
1032438fd1498Szrj
1032538fd1498Szrj if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
1032638fd1498Szrj && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
1032738fd1498Szrj return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1032838fd1498Szrj || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
1032938fd1498Szrj TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
1033038fd1498Szrj
1033138fd1498Szrj prev_type = intermediate_type;
1033238fd1498Szrj prev_mode = intermediate_mode;
1033338fd1498Szrj }
1033438fd1498Szrj
1033538fd1498Szrj interm_types->release ();
1033638fd1498Szrj return false;
1033738fd1498Szrj }
1033838fd1498Szrj
1033938fd1498Szrj
1034038fd1498Szrj /* Function supportable_narrowing_operation
1034138fd1498Szrj
1034238fd1498Szrj Check whether an operation represented by the code CODE is a
1034338fd1498Szrj narrowing operation that is supported by the target platform in
1034438fd1498Szrj vector form (i.e., when operating on arguments of type VECTYPE_IN
1034538fd1498Szrj and producing a result of type VECTYPE_OUT).
1034638fd1498Szrj
1034738fd1498Szrj Narrowing operations we currently support are NOP (CONVERT) and
1034838fd1498Szrj FIX_TRUNC. This function checks if these operations are supported by
1034938fd1498Szrj the target platform directly via vector tree-codes.
1035038fd1498Szrj
1035138fd1498Szrj Output:
1035238fd1498Szrj - CODE1 is the code of a vector operation to be used when
1035338fd1498Szrj vectorizing the operation, if available.
1035438fd1498Szrj - MULTI_STEP_CVT determines the number of required intermediate steps in
1035538fd1498Szrj case of multi-step conversion (like int->short->char - in that case
1035638fd1498Szrj MULTI_STEP_CVT will be 1).
1035738fd1498Szrj - INTERM_TYPES contains the intermediate type required to perform the
1035838fd1498Szrj narrowing operation (short in the above example). */
1035938fd1498Szrj
1036038fd1498Szrj bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)1036138fd1498Szrj supportable_narrowing_operation (enum tree_code code,
1036238fd1498Szrj tree vectype_out, tree vectype_in,
1036338fd1498Szrj enum tree_code *code1, int *multi_step_cvt,
1036438fd1498Szrj vec<tree> *interm_types)
1036538fd1498Szrj {
1036638fd1498Szrj machine_mode vec_mode;
1036738fd1498Szrj enum insn_code icode1;
1036838fd1498Szrj optab optab1, interm_optab;
1036938fd1498Szrj tree vectype = vectype_in;
1037038fd1498Szrj tree narrow_vectype = vectype_out;
1037138fd1498Szrj enum tree_code c1;
1037238fd1498Szrj tree intermediate_type, prev_type;
1037338fd1498Szrj machine_mode intermediate_mode, prev_mode;
1037438fd1498Szrj int i;
1037538fd1498Szrj bool uns;
1037638fd1498Szrj
1037738fd1498Szrj *multi_step_cvt = 0;
1037838fd1498Szrj switch (code)
1037938fd1498Szrj {
1038038fd1498Szrj CASE_CONVERT:
1038138fd1498Szrj c1 = VEC_PACK_TRUNC_EXPR;
1038238fd1498Szrj break;
1038338fd1498Szrj
1038438fd1498Szrj case FIX_TRUNC_EXPR:
1038538fd1498Szrj c1 = VEC_PACK_FIX_TRUNC_EXPR;
1038638fd1498Szrj break;
1038738fd1498Szrj
1038838fd1498Szrj case FLOAT_EXPR:
1038938fd1498Szrj /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
1039038fd1498Szrj tree code and optabs used for computing the operation. */
1039138fd1498Szrj return false;
1039238fd1498Szrj
1039338fd1498Szrj default:
1039438fd1498Szrj gcc_unreachable ();
1039538fd1498Szrj }
1039638fd1498Szrj
1039738fd1498Szrj if (code == FIX_TRUNC_EXPR)
1039838fd1498Szrj /* The signedness is determined from output operand. */
1039938fd1498Szrj optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
1040038fd1498Szrj else
1040138fd1498Szrj optab1 = optab_for_tree_code (c1, vectype, optab_default);
1040238fd1498Szrj
1040338fd1498Szrj if (!optab1)
1040438fd1498Szrj return false;
1040538fd1498Szrj
1040638fd1498Szrj vec_mode = TYPE_MODE (vectype);
1040738fd1498Szrj if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
1040838fd1498Szrj return false;
1040938fd1498Szrj
1041038fd1498Szrj *code1 = c1;
1041138fd1498Szrj
1041238fd1498Szrj if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
1041338fd1498Szrj /* For scalar masks we may have different boolean
1041438fd1498Szrj vector types having the same QImode. Thus we
1041538fd1498Szrj add additional check for elements number. */
1041638fd1498Szrj return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1041738fd1498Szrj || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
1041838fd1498Szrj TYPE_VECTOR_SUBPARTS (narrow_vectype)));
1041938fd1498Szrj
1042038fd1498Szrj /* Check if it's a multi-step conversion that can be done using intermediate
1042138fd1498Szrj types. */
1042238fd1498Szrj prev_mode = vec_mode;
1042338fd1498Szrj prev_type = vectype;
1042438fd1498Szrj if (code == FIX_TRUNC_EXPR)
1042538fd1498Szrj uns = TYPE_UNSIGNED (vectype_out);
1042638fd1498Szrj else
1042738fd1498Szrj uns = TYPE_UNSIGNED (vectype);
1042838fd1498Szrj
1042938fd1498Szrj /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
1043038fd1498Szrj conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
1043138fd1498Szrj costly than signed. */
1043238fd1498Szrj if (code == FIX_TRUNC_EXPR && uns)
1043338fd1498Szrj {
1043438fd1498Szrj enum insn_code icode2;
1043538fd1498Szrj
1043638fd1498Szrj intermediate_type
1043738fd1498Szrj = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
1043838fd1498Szrj interm_optab
1043938fd1498Szrj = optab_for_tree_code (c1, intermediate_type, optab_default);
1044038fd1498Szrj if (interm_optab != unknown_optab
1044138fd1498Szrj && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
1044238fd1498Szrj && insn_data[icode1].operand[0].mode
1044338fd1498Szrj == insn_data[icode2].operand[0].mode)
1044438fd1498Szrj {
1044538fd1498Szrj uns = false;
1044638fd1498Szrj optab1 = interm_optab;
1044738fd1498Szrj icode1 = icode2;
1044838fd1498Szrj }
1044938fd1498Szrj }
1045038fd1498Szrj
1045138fd1498Szrj /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
1045238fd1498Szrj intermediate steps in promotion sequence. We try
1045338fd1498Szrj MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
1045438fd1498Szrj interm_types->create (MAX_INTERM_CVT_STEPS);
1045538fd1498Szrj for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
1045638fd1498Szrj {
1045738fd1498Szrj intermediate_mode = insn_data[icode1].operand[0].mode;
1045838fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (prev_type))
1045938fd1498Szrj {
1046038fd1498Szrj intermediate_type = vect_double_mask_nunits (prev_type);
1046138fd1498Szrj if (intermediate_mode != TYPE_MODE (intermediate_type))
1046238fd1498Szrj return false;
1046338fd1498Szrj }
1046438fd1498Szrj else
1046538fd1498Szrj intermediate_type
1046638fd1498Szrj = lang_hooks.types.type_for_mode (intermediate_mode, uns);
1046738fd1498Szrj interm_optab
1046838fd1498Szrj = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
1046938fd1498Szrj optab_default);
1047038fd1498Szrj if (!interm_optab
1047138fd1498Szrj || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
1047238fd1498Szrj || insn_data[icode1].operand[0].mode != intermediate_mode
1047338fd1498Szrj || ((icode1 = optab_handler (interm_optab, intermediate_mode))
1047438fd1498Szrj == CODE_FOR_nothing))
1047538fd1498Szrj break;
1047638fd1498Szrj
1047738fd1498Szrj interm_types->quick_push (intermediate_type);
1047838fd1498Szrj (*multi_step_cvt)++;
1047938fd1498Szrj
1048038fd1498Szrj if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
1048138fd1498Szrj return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1048238fd1498Szrj || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
1048338fd1498Szrj TYPE_VECTOR_SUBPARTS (narrow_vectype)));
1048438fd1498Szrj
1048538fd1498Szrj prev_mode = intermediate_mode;
1048638fd1498Szrj prev_type = intermediate_type;
1048738fd1498Szrj optab1 = interm_optab;
1048838fd1498Szrj }
1048938fd1498Szrj
1049038fd1498Szrj interm_types->release ();
1049138fd1498Szrj return false;
1049238fd1498Szrj }
1049338fd1498Szrj
1049438fd1498Szrj /* Generate and return a statement that sets vector mask MASK such that
1049538fd1498Szrj MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
1049638fd1498Szrj
1049738fd1498Szrj gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)1049838fd1498Szrj vect_gen_while (tree mask, tree start_index, tree end_index)
1049938fd1498Szrj {
1050038fd1498Szrj tree cmp_type = TREE_TYPE (start_index);
1050138fd1498Szrj tree mask_type = TREE_TYPE (mask);
1050238fd1498Szrj gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
1050338fd1498Szrj cmp_type, mask_type,
1050438fd1498Szrj OPTIMIZE_FOR_SPEED));
1050538fd1498Szrj gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
1050638fd1498Szrj start_index, end_index,
1050738fd1498Szrj build_zero_cst (mask_type));
1050838fd1498Szrj gimple_call_set_lhs (call, mask);
1050938fd1498Szrj return call;
1051038fd1498Szrj }
1051138fd1498Szrj
1051238fd1498Szrj /* Generate a vector mask of type MASK_TYPE for which index I is false iff
1051338fd1498Szrj J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
1051438fd1498Szrj
1051538fd1498Szrj tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)1051638fd1498Szrj vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
1051738fd1498Szrj tree end_index)
1051838fd1498Szrj {
1051938fd1498Szrj tree tmp = make_ssa_name (mask_type);
1052038fd1498Szrj gcall *call = vect_gen_while (tmp, start_index, end_index);
1052138fd1498Szrj gimple_seq_add_stmt (seq, call);
1052238fd1498Szrj return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
1052338fd1498Szrj }
10524