138fd1498Szrj /* Statement Analysis and Transformation for Vectorization
238fd1498Szrj    Copyright (C) 2003-2018 Free Software Foundation, Inc.
338fd1498Szrj    Contributed by Dorit Naishlos <dorit@il.ibm.com>
438fd1498Szrj    and Ira Rosen <irar@il.ibm.com>
538fd1498Szrj 
638fd1498Szrj This file is part of GCC.
738fd1498Szrj 
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
938fd1498Szrj the terms of the GNU General Public License as published by the Free
1038fd1498Szrj Software Foundation; either version 3, or (at your option) any later
1138fd1498Szrj version.
1238fd1498Szrj 
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1438fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj 
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3.  If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>.  */
2138fd1498Szrj 
2238fd1498Szrj #include "config.h"
2338fd1498Szrj #include "system.h"
2438fd1498Szrj #include "coretypes.h"
2538fd1498Szrj #include "backend.h"
2638fd1498Szrj #include "target.h"
2738fd1498Szrj #include "rtl.h"
2838fd1498Szrj #include "tree.h"
2938fd1498Szrj #include "gimple.h"
3038fd1498Szrj #include "ssa.h"
3138fd1498Szrj #include "optabs-tree.h"
3238fd1498Szrj #include "insn-config.h"
3338fd1498Szrj #include "recog.h"		/* FIXME: for insn_data */
3438fd1498Szrj #include "cgraph.h"
3538fd1498Szrj #include "dumpfile.h"
3638fd1498Szrj #include "alias.h"
3738fd1498Szrj #include "fold-const.h"
3838fd1498Szrj #include "stor-layout.h"
3938fd1498Szrj #include "tree-eh.h"
4038fd1498Szrj #include "gimplify.h"
4138fd1498Szrj #include "gimple-iterator.h"
4238fd1498Szrj #include "gimplify-me.h"
4338fd1498Szrj #include "tree-cfg.h"
4438fd1498Szrj #include "tree-ssa-loop-manip.h"
4538fd1498Szrj #include "cfgloop.h"
4638fd1498Szrj #include "tree-ssa-loop.h"
4738fd1498Szrj #include "tree-scalar-evolution.h"
4838fd1498Szrj #include "tree-vectorizer.h"
4938fd1498Szrj #include "builtins.h"
5038fd1498Szrj #include "internal-fn.h"
5138fd1498Szrj #include "tree-vector-builder.h"
5238fd1498Szrj #include "vec-perm-indices.h"
5338fd1498Szrj #include "tree-ssa-loop-niter.h"
5438fd1498Szrj #include "gimple-fold.h"
5538fd1498Szrj 
5638fd1498Szrj /* For lang_hooks.types.type_for_mode.  */
5738fd1498Szrj #include "langhooks.h"
5838fd1498Szrj 
5938fd1498Szrj /* Return the vectorized type for the given statement.  */
6038fd1498Szrj 
6138fd1498Szrj tree
stmt_vectype(struct _stmt_vec_info * stmt_info)6238fd1498Szrj stmt_vectype (struct _stmt_vec_info *stmt_info)
6338fd1498Szrj {
6438fd1498Szrj   return STMT_VINFO_VECTYPE (stmt_info);
6538fd1498Szrj }
6638fd1498Szrj 
6738fd1498Szrj /* Return TRUE iff the given statement is in an inner loop relative to
6838fd1498Szrj    the loop being vectorized.  */
6938fd1498Szrj bool
stmt_in_inner_loop_p(struct _stmt_vec_info * stmt_info)7038fd1498Szrj stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
7138fd1498Szrj {
7238fd1498Szrj   gimple *stmt = STMT_VINFO_STMT (stmt_info);
7338fd1498Szrj   basic_block bb = gimple_bb (stmt);
7438fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7538fd1498Szrj   struct loop* loop;
7638fd1498Szrj 
7738fd1498Szrj   if (!loop_vinfo)
7838fd1498Szrj     return false;
7938fd1498Szrj 
8038fd1498Szrj   loop = LOOP_VINFO_LOOP (loop_vinfo);
8138fd1498Szrj 
8238fd1498Szrj   return (bb->loop_father == loop->inner);
8338fd1498Szrj }
8438fd1498Szrj 
8538fd1498Szrj /* Record the cost of a statement, either by directly informing the
8638fd1498Szrj    target model or by saving it in a vector for later processing.
8738fd1498Szrj    Return a preliminary estimate of the statement's cost.  */
8838fd1498Szrj 
8938fd1498Szrj unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)9038fd1498Szrj record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
9138fd1498Szrj 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
9238fd1498Szrj 		  int misalign, enum vect_cost_model_location where)
9338fd1498Szrj {
9438fd1498Szrj   if ((kind == vector_load || kind == unaligned_load)
9538fd1498Szrj       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9638fd1498Szrj     kind = vector_gather_load;
9738fd1498Szrj   if ((kind == vector_store || kind == unaligned_store)
9838fd1498Szrj       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9938fd1498Szrj     kind = vector_scatter_store;
10038fd1498Szrj   if (body_cost_vec)
10138fd1498Szrj     {
10238fd1498Szrj       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10338fd1498Szrj       stmt_info_for_cost si = { count, kind,
10438fd1498Szrj 			        stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
10538fd1498Szrj 				misalign };
10638fd1498Szrj       body_cost_vec->safe_push (si);
10738fd1498Szrj       return (unsigned)
10838fd1498Szrj 	(builtin_vectorization_cost (kind, vectype, misalign) * count);
10938fd1498Szrj     }
11038fd1498Szrj   else
11138fd1498Szrj     return add_stmt_cost (stmt_info->vinfo->target_cost_data,
11238fd1498Szrj 			  count, kind, stmt_info, misalign, where);
11338fd1498Szrj }
11438fd1498Szrj 
11538fd1498Szrj /* Return a variable of type ELEM_TYPE[NELEMS].  */
11638fd1498Szrj 
11738fd1498Szrj static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)11838fd1498Szrj create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
11938fd1498Szrj {
12038fd1498Szrj   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
12138fd1498Szrj 			 "vect_array");
12238fd1498Szrj }
12338fd1498Szrj 
12438fd1498Szrj /* ARRAY is an array of vectors created by create_vector_array.
12538fd1498Szrj    Return an SSA_NAME for the vector in index N.  The reference
12638fd1498Szrj    is part of the vectorization of STMT and the vector is associated
12738fd1498Szrj    with scalar destination SCALAR_DEST.  */
12838fd1498Szrj 
12938fd1498Szrj static tree
read_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)13038fd1498Szrj read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
13138fd1498Szrj 		   tree array, unsigned HOST_WIDE_INT n)
13238fd1498Szrj {
13338fd1498Szrj   tree vect_type, vect, vect_name, array_ref;
13438fd1498Szrj   gimple *new_stmt;
13538fd1498Szrj 
13638fd1498Szrj   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
13738fd1498Szrj   vect_type = TREE_TYPE (TREE_TYPE (array));
13838fd1498Szrj   vect = vect_create_destination_var (scalar_dest, vect_type);
13938fd1498Szrj   array_ref = build4 (ARRAY_REF, vect_type, array,
14038fd1498Szrj 		      build_int_cst (size_type_node, n),
14138fd1498Szrj 		      NULL_TREE, NULL_TREE);
14238fd1498Szrj 
14338fd1498Szrj   new_stmt = gimple_build_assign (vect, array_ref);
14438fd1498Szrj   vect_name = make_ssa_name (vect, new_stmt);
14538fd1498Szrj   gimple_assign_set_lhs (new_stmt, vect_name);
14638fd1498Szrj   vect_finish_stmt_generation (stmt, new_stmt, gsi);
14738fd1498Szrj 
14838fd1498Szrj   return vect_name;
14938fd1498Szrj }
15038fd1498Szrj 
15138fd1498Szrj /* ARRAY is an array of vectors created by create_vector_array.
15238fd1498Szrj    Emit code to store SSA_NAME VECT in index N of the array.
15338fd1498Szrj    The store is part of the vectorization of STMT.  */
15438fd1498Szrj 
15538fd1498Szrj static void
write_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)15638fd1498Szrj write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
15738fd1498Szrj 		    tree array, unsigned HOST_WIDE_INT n)
15838fd1498Szrj {
15938fd1498Szrj   tree array_ref;
16038fd1498Szrj   gimple *new_stmt;
16138fd1498Szrj 
16238fd1498Szrj   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
16338fd1498Szrj 		      build_int_cst (size_type_node, n),
16438fd1498Szrj 		      NULL_TREE, NULL_TREE);
16538fd1498Szrj 
16638fd1498Szrj   new_stmt = gimple_build_assign (array_ref, vect);
16738fd1498Szrj   vect_finish_stmt_generation (stmt, new_stmt, gsi);
16838fd1498Szrj }
16938fd1498Szrj 
17038fd1498Szrj /* PTR is a pointer to an array of type TYPE.  Return a representation
17138fd1498Szrj    of *PTR.  The memory reference replaces those in FIRST_DR
17238fd1498Szrj    (and its group).  */
17338fd1498Szrj 
17438fd1498Szrj static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)17538fd1498Szrj create_array_ref (tree type, tree ptr, tree alias_ptr_type)
17638fd1498Szrj {
17738fd1498Szrj   tree mem_ref;
17838fd1498Szrj 
17938fd1498Szrj   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
18038fd1498Szrj   /* Arrays have the same alignment as their type.  */
18138fd1498Szrj   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
18238fd1498Szrj   return mem_ref;
18338fd1498Szrj }
18438fd1498Szrj 
18538fd1498Szrj /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
18638fd1498Szrj 
18738fd1498Szrj /* Function vect_mark_relevant.
18838fd1498Szrj 
18938fd1498Szrj    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
19038fd1498Szrj 
19138fd1498Szrj static void
vect_mark_relevant(vec<gimple * > * worklist,gimple * stmt,enum vect_relevant relevant,bool live_p)19238fd1498Szrj vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
19338fd1498Szrj 		    enum vect_relevant relevant, bool live_p)
19438fd1498Szrj {
19538fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
19638fd1498Szrj   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
19738fd1498Szrj   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
19838fd1498Szrj   gimple *pattern_stmt;
19938fd1498Szrj 
20038fd1498Szrj   if (dump_enabled_p ())
20138fd1498Szrj     {
20238fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
20338fd1498Szrj 		       "mark relevant %d, live %d: ", relevant, live_p);
20438fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
20538fd1498Szrj     }
20638fd1498Szrj 
20738fd1498Szrj   /* If this stmt is an original stmt in a pattern, we might need to mark its
20838fd1498Szrj      related pattern stmt instead of the original stmt.  However, such stmts
20938fd1498Szrj      may have their own uses that are not in any pattern, in such cases the
21038fd1498Szrj      stmt itself should be marked.  */
21138fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
21238fd1498Szrj     {
21338fd1498Szrj       /* This is the last stmt in a sequence that was detected as a
21438fd1498Szrj 	 pattern that can potentially be vectorized.  Don't mark the stmt
21538fd1498Szrj 	 as relevant/live because it's not going to be vectorized.
21638fd1498Szrj 	 Instead mark the pattern-stmt that replaces it.  */
21738fd1498Szrj 
21838fd1498Szrj       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
21938fd1498Szrj 
22038fd1498Szrj       if (dump_enabled_p ())
22138fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
22238fd1498Szrj 			 "last stmt in pattern. don't mark"
22338fd1498Szrj 			 " relevant/live.\n");
22438fd1498Szrj       stmt_info = vinfo_for_stmt (pattern_stmt);
22538fd1498Szrj       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
22638fd1498Szrj       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
22738fd1498Szrj       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
22838fd1498Szrj       stmt = pattern_stmt;
22938fd1498Szrj     }
23038fd1498Szrj 
23138fd1498Szrj   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
23238fd1498Szrj   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
23338fd1498Szrj     STMT_VINFO_RELEVANT (stmt_info) = relevant;
23438fd1498Szrj 
23538fd1498Szrj   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
23638fd1498Szrj       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
23738fd1498Szrj     {
23838fd1498Szrj       if (dump_enabled_p ())
23938fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
24038fd1498Szrj                          "already marked relevant/live.\n");
24138fd1498Szrj       return;
24238fd1498Szrj     }
24338fd1498Szrj 
24438fd1498Szrj   worklist->safe_push (stmt);
24538fd1498Szrj }
24638fd1498Szrj 
24738fd1498Szrj 
24838fd1498Szrj /* Function is_simple_and_all_uses_invariant
24938fd1498Szrj 
25038fd1498Szrj    Return true if STMT is simple and all uses of it are invariant.  */
25138fd1498Szrj 
25238fd1498Szrj bool
is_simple_and_all_uses_invariant(gimple * stmt,loop_vec_info loop_vinfo)25338fd1498Szrj is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
25438fd1498Szrj {
25538fd1498Szrj   tree op;
25638fd1498Szrj   gimple *def_stmt;
25738fd1498Szrj   ssa_op_iter iter;
25838fd1498Szrj 
25938fd1498Szrj   if (!is_gimple_assign (stmt))
26038fd1498Szrj     return false;
26138fd1498Szrj 
26238fd1498Szrj   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
26338fd1498Szrj     {
26438fd1498Szrj       enum vect_def_type dt = vect_uninitialized_def;
26538fd1498Szrj 
26638fd1498Szrj       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
26738fd1498Szrj 	{
26838fd1498Szrj 	  if (dump_enabled_p ())
26938fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
27038fd1498Szrj 			     "use not simple.\n");
27138fd1498Szrj 	  return false;
27238fd1498Szrj 	}
27338fd1498Szrj 
27438fd1498Szrj       if (dt != vect_external_def && dt != vect_constant_def)
27538fd1498Szrj 	return false;
27638fd1498Szrj     }
27738fd1498Szrj   return true;
27838fd1498Szrj }
27938fd1498Szrj 
28038fd1498Szrj /* Function vect_stmt_relevant_p.
28138fd1498Szrj 
28238fd1498Szrj    Return true if STMT in loop that is represented by LOOP_VINFO is
28338fd1498Szrj    "relevant for vectorization".
28438fd1498Szrj 
28538fd1498Szrj    A stmt is considered "relevant for vectorization" if:
28638fd1498Szrj    - it has uses outside the loop.
28738fd1498Szrj    - it has vdefs (it alters memory).
28838fd1498Szrj    - control stmts in the loop (except for the exit condition).
28938fd1498Szrj 
29038fd1498Szrj    CHECKME: what other side effects would the vectorizer allow?  */
29138fd1498Szrj 
29238fd1498Szrj static bool
vect_stmt_relevant_p(gimple * stmt,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)29338fd1498Szrj vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
29438fd1498Szrj 		      enum vect_relevant *relevant, bool *live_p)
29538fd1498Szrj {
29638fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
29738fd1498Szrj   ssa_op_iter op_iter;
29838fd1498Szrj   imm_use_iterator imm_iter;
29938fd1498Szrj   use_operand_p use_p;
30038fd1498Szrj   def_operand_p def_p;
30138fd1498Szrj 
30238fd1498Szrj   *relevant = vect_unused_in_scope;
30338fd1498Szrj   *live_p = false;
30438fd1498Szrj 
30538fd1498Szrj   /* cond stmt other than loop exit cond.  */
30638fd1498Szrj   if (is_ctrl_stmt (stmt)
30738fd1498Szrj       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
30838fd1498Szrj          != loop_exit_ctrl_vec_info_type)
30938fd1498Szrj     *relevant = vect_used_in_scope;
31038fd1498Szrj 
31138fd1498Szrj   /* changing memory.  */
31238fd1498Szrj   if (gimple_code (stmt) != GIMPLE_PHI)
31338fd1498Szrj     if (gimple_vdef (stmt)
31438fd1498Szrj 	&& !gimple_clobber_p (stmt))
31538fd1498Szrj       {
31638fd1498Szrj 	if (dump_enabled_p ())
31738fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
31838fd1498Szrj                            "vec_stmt_relevant_p: stmt has vdefs.\n");
31938fd1498Szrj 	*relevant = vect_used_in_scope;
32038fd1498Szrj       }
32138fd1498Szrj 
32238fd1498Szrj   /* uses outside the loop.  */
32338fd1498Szrj   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
32438fd1498Szrj     {
32538fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
32638fd1498Szrj 	{
32738fd1498Szrj 	  basic_block bb = gimple_bb (USE_STMT (use_p));
32838fd1498Szrj 	  if (!flow_bb_inside_loop_p (loop, bb))
32938fd1498Szrj 	    {
33038fd1498Szrj 	      if (dump_enabled_p ())
33138fd1498Szrj 		dump_printf_loc (MSG_NOTE, vect_location,
33238fd1498Szrj                                  "vec_stmt_relevant_p: used out of loop.\n");
33338fd1498Szrj 
33438fd1498Szrj 	      if (is_gimple_debug (USE_STMT (use_p)))
33538fd1498Szrj 		continue;
33638fd1498Szrj 
33738fd1498Szrj 	      /* We expect all such uses to be in the loop exit phis
33838fd1498Szrj 		 (because of loop closed form)   */
33938fd1498Szrj 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
34038fd1498Szrj 	      gcc_assert (bb == single_exit (loop)->dest);
34138fd1498Szrj 
34238fd1498Szrj               *live_p = true;
34338fd1498Szrj 	    }
34438fd1498Szrj 	}
34538fd1498Szrj     }
34638fd1498Szrj 
34738fd1498Szrj   if (*live_p && *relevant == vect_unused_in_scope
34838fd1498Szrj       && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
34938fd1498Szrj     {
35038fd1498Szrj       if (dump_enabled_p ())
35138fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
35238fd1498Szrj 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
35338fd1498Szrj       *relevant = vect_used_only_live;
35438fd1498Szrj     }
35538fd1498Szrj 
35638fd1498Szrj   return (*live_p || *relevant);
35738fd1498Szrj }
35838fd1498Szrj 
35938fd1498Szrj 
36038fd1498Szrj /* Function exist_non_indexing_operands_for_use_p
36138fd1498Szrj 
36238fd1498Szrj    USE is one of the uses attached to STMT.  Check if USE is
36338fd1498Szrj    used in STMT for anything other than indexing an array.  */
36438fd1498Szrj 
36538fd1498Szrj static bool
exist_non_indexing_operands_for_use_p(tree use,gimple * stmt)36638fd1498Szrj exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
36738fd1498Szrj {
36838fd1498Szrj   tree operand;
36938fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
37038fd1498Szrj 
37138fd1498Szrj   /* USE corresponds to some operand in STMT.  If there is no data
37238fd1498Szrj      reference in STMT, then any operand that corresponds to USE
37338fd1498Szrj      is not indexing an array.  */
37438fd1498Szrj   if (!STMT_VINFO_DATA_REF (stmt_info))
37538fd1498Szrj     return true;
37638fd1498Szrj 
37738fd1498Szrj   /* STMT has a data_ref. FORNOW this means that its of one of
37838fd1498Szrj      the following forms:
37938fd1498Szrj      -1- ARRAY_REF = var
38038fd1498Szrj      -2- var = ARRAY_REF
38138fd1498Szrj      (This should have been verified in analyze_data_refs).
38238fd1498Szrj 
38338fd1498Szrj      'var' in the second case corresponds to a def, not a use,
38438fd1498Szrj      so USE cannot correspond to any operands that are not used
38538fd1498Szrj      for array indexing.
38638fd1498Szrj 
38738fd1498Szrj      Therefore, all we need to check is if STMT falls into the
38838fd1498Szrj      first case, and whether var corresponds to USE.  */
38938fd1498Szrj 
39038fd1498Szrj   if (!gimple_assign_copy_p (stmt))
39138fd1498Szrj     {
39238fd1498Szrj       if (is_gimple_call (stmt)
39338fd1498Szrj 	  && gimple_call_internal_p (stmt))
39438fd1498Szrj 	{
39538fd1498Szrj 	  internal_fn ifn = gimple_call_internal_fn (stmt);
39638fd1498Szrj 	  int mask_index = internal_fn_mask_index (ifn);
39738fd1498Szrj 	  if (mask_index >= 0
39838fd1498Szrj 	      && use == gimple_call_arg (stmt, mask_index))
39938fd1498Szrj 	    return true;
40038fd1498Szrj 	  int stored_value_index = internal_fn_stored_value_index (ifn);
40138fd1498Szrj 	  if (stored_value_index >= 0
40238fd1498Szrj 	      && use == gimple_call_arg (stmt, stored_value_index))
40338fd1498Szrj 	    return true;
40438fd1498Szrj 	  if (internal_gather_scatter_fn_p (ifn)
40538fd1498Szrj 	      && use == gimple_call_arg (stmt, 1))
40638fd1498Szrj 	    return true;
40738fd1498Szrj 	}
40838fd1498Szrj       return false;
40938fd1498Szrj     }
41038fd1498Szrj 
41138fd1498Szrj   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
41238fd1498Szrj     return false;
41338fd1498Szrj   operand = gimple_assign_rhs1 (stmt);
41438fd1498Szrj   if (TREE_CODE (operand) != SSA_NAME)
41538fd1498Szrj     return false;
41638fd1498Szrj 
41738fd1498Szrj   if (operand == use)
41838fd1498Szrj     return true;
41938fd1498Szrj 
42038fd1498Szrj   return false;
42138fd1498Szrj }
42238fd1498Szrj 
42338fd1498Szrj 
42438fd1498Szrj /*
42538fd1498Szrj    Function process_use.
42638fd1498Szrj 
42738fd1498Szrj    Inputs:
42838fd1498Szrj    - a USE in STMT in a loop represented by LOOP_VINFO
42938fd1498Szrj    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
43038fd1498Szrj      that defined USE.  This is done by calling mark_relevant and passing it
43138fd1498Szrj      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
43238fd1498Szrj    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
43338fd1498Szrj      be performed.
43438fd1498Szrj 
43538fd1498Szrj    Outputs:
43638fd1498Szrj    Generally, LIVE_P and RELEVANT are used to define the liveness and
43738fd1498Szrj    relevance info of the DEF_STMT of this USE:
43838fd1498Szrj        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
43938fd1498Szrj        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
44038fd1498Szrj    Exceptions:
44138fd1498Szrj    - case 1: If USE is used only for address computations (e.g. array indexing),
44238fd1498Szrj    which does not need to be directly vectorized, then the liveness/relevance
44338fd1498Szrj    of the respective DEF_STMT is left unchanged.
44438fd1498Szrj    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
44538fd1498Szrj    skip DEF_STMT cause it had already been processed.
44638fd1498Szrj    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
44738fd1498Szrj    be modified accordingly.
44838fd1498Szrj 
44938fd1498Szrj    Return true if everything is as expected. Return false otherwise.  */
45038fd1498Szrj 
45138fd1498Szrj static bool
process_use(gimple * stmt,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<gimple * > * worklist,bool force)45238fd1498Szrj process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
45338fd1498Szrj 	     enum vect_relevant relevant, vec<gimple *> *worklist,
45438fd1498Szrj 	     bool force)
45538fd1498Szrj {
45638fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
45738fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
45838fd1498Szrj   stmt_vec_info dstmt_vinfo;
45938fd1498Szrj   basic_block bb, def_bb;
46038fd1498Szrj   gimple *def_stmt;
46138fd1498Szrj   enum vect_def_type dt;
46238fd1498Szrj 
46338fd1498Szrj   /* case 1: we are only interested in uses that need to be vectorized.  Uses
46438fd1498Szrj      that are used for address computation are not considered relevant.  */
46538fd1498Szrj   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
46638fd1498Szrj      return true;
46738fd1498Szrj 
46838fd1498Szrj   if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
46938fd1498Szrj     {
47038fd1498Szrj       if (dump_enabled_p ())
47138fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
47238fd1498Szrj                          "not vectorized: unsupported use in stmt.\n");
47338fd1498Szrj       return false;
47438fd1498Szrj     }
47538fd1498Szrj 
47638fd1498Szrj   if (!def_stmt || gimple_nop_p (def_stmt))
47738fd1498Szrj     return true;
47838fd1498Szrj 
47938fd1498Szrj   def_bb = gimple_bb (def_stmt);
48038fd1498Szrj   if (!flow_bb_inside_loop_p (loop, def_bb))
48138fd1498Szrj     {
48238fd1498Szrj       if (dump_enabled_p ())
48338fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
48438fd1498Szrj       return true;
48538fd1498Szrj     }
48638fd1498Szrj 
48738fd1498Szrj   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
48838fd1498Szrj      DEF_STMT must have already been processed, because this should be the
48938fd1498Szrj      only way that STMT, which is a reduction-phi, was put in the worklist,
49038fd1498Szrj      as there should be no other uses for DEF_STMT in the loop.  So we just
49138fd1498Szrj      check that everything is as expected, and we are done.  */
49238fd1498Szrj   dstmt_vinfo = vinfo_for_stmt (def_stmt);
49338fd1498Szrj   bb = gimple_bb (stmt);
49438fd1498Szrj   if (gimple_code (stmt) == GIMPLE_PHI
49538fd1498Szrj       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
49638fd1498Szrj       && gimple_code (def_stmt) != GIMPLE_PHI
49738fd1498Szrj       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
49838fd1498Szrj       && bb->loop_father == def_bb->loop_father)
49938fd1498Szrj     {
50038fd1498Szrj       if (dump_enabled_p ())
50138fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
50238fd1498Szrj                          "reduc-stmt defining reduc-phi in the same nest.\n");
50338fd1498Szrj       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
50438fd1498Szrj 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
50538fd1498Szrj       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
50638fd1498Szrj       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
50738fd1498Szrj 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
50838fd1498Szrj       return true;
50938fd1498Szrj     }
51038fd1498Szrj 
51138fd1498Szrj   /* case 3a: outer-loop stmt defining an inner-loop stmt:
51238fd1498Szrj 	outer-loop-header-bb:
51338fd1498Szrj 		d = def_stmt
51438fd1498Szrj 	inner-loop:
51538fd1498Szrj 		stmt # use (d)
51638fd1498Szrj 	outer-loop-tail-bb:
51738fd1498Szrj 		...		  */
51838fd1498Szrj   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
51938fd1498Szrj     {
52038fd1498Szrj       if (dump_enabled_p ())
52138fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
52238fd1498Szrj                          "outer-loop def-stmt defining inner-loop stmt.\n");
52338fd1498Szrj 
52438fd1498Szrj       switch (relevant)
52538fd1498Szrj 	{
52638fd1498Szrj 	case vect_unused_in_scope:
52738fd1498Szrj 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
52838fd1498Szrj 		      vect_used_in_scope : vect_unused_in_scope;
52938fd1498Szrj 	  break;
53038fd1498Szrj 
53138fd1498Szrj 	case vect_used_in_outer_by_reduction:
53238fd1498Szrj           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
53338fd1498Szrj 	  relevant = vect_used_by_reduction;
53438fd1498Szrj 	  break;
53538fd1498Szrj 
53638fd1498Szrj 	case vect_used_in_outer:
53738fd1498Szrj           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
53838fd1498Szrj 	  relevant = vect_used_in_scope;
53938fd1498Szrj 	  break;
54038fd1498Szrj 
54138fd1498Szrj 	case vect_used_in_scope:
54238fd1498Szrj 	  break;
54338fd1498Szrj 
54438fd1498Szrj 	default:
54538fd1498Szrj 	  gcc_unreachable ();
54638fd1498Szrj 	}
54738fd1498Szrj     }
54838fd1498Szrj 
54938fd1498Szrj   /* case 3b: inner-loop stmt defining an outer-loop stmt:
55038fd1498Szrj 	outer-loop-header-bb:
55138fd1498Szrj 		...
55238fd1498Szrj 	inner-loop:
55338fd1498Szrj 		d = def_stmt
55438fd1498Szrj 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
55538fd1498Szrj 		stmt # use (d)		*/
55638fd1498Szrj   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
55738fd1498Szrj     {
55838fd1498Szrj       if (dump_enabled_p ())
55938fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
56038fd1498Szrj                          "inner-loop def-stmt defining outer-loop stmt.\n");
56138fd1498Szrj 
56238fd1498Szrj       switch (relevant)
56338fd1498Szrj         {
56438fd1498Szrj         case vect_unused_in_scope:
56538fd1498Szrj           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
56638fd1498Szrj             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
56738fd1498Szrj                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
56838fd1498Szrj           break;
56938fd1498Szrj 
57038fd1498Szrj         case vect_used_by_reduction:
57138fd1498Szrj 	case vect_used_only_live:
57238fd1498Szrj           relevant = vect_used_in_outer_by_reduction;
57338fd1498Szrj           break;
57438fd1498Szrj 
57538fd1498Szrj         case vect_used_in_scope:
57638fd1498Szrj           relevant = vect_used_in_outer;
57738fd1498Szrj           break;
57838fd1498Szrj 
57938fd1498Szrj         default:
58038fd1498Szrj           gcc_unreachable ();
58138fd1498Szrj         }
58238fd1498Szrj     }
58338fd1498Szrj   /* We are also not interested in uses on loop PHI backedges that are
58438fd1498Szrj      inductions.  Otherwise we'll needlessly vectorize the IV increment
58538fd1498Szrj      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
58638fd1498Szrj      of course.  */
58738fd1498Szrj   else if (gimple_code (stmt) == GIMPLE_PHI
58838fd1498Szrj 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
58938fd1498Szrj 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
59038fd1498Szrj 	   && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
59138fd1498Szrj 	       == use))
59238fd1498Szrj     {
59338fd1498Szrj       if (dump_enabled_p ())
59438fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
59538fd1498Szrj                          "induction value on backedge.\n");
59638fd1498Szrj       return true;
59738fd1498Szrj     }
59838fd1498Szrj 
59938fd1498Szrj 
60038fd1498Szrj   vect_mark_relevant (worklist, def_stmt, relevant, false);
60138fd1498Szrj   return true;
60238fd1498Szrj }
60338fd1498Szrj 
60438fd1498Szrj 
60538fd1498Szrj /* Function vect_mark_stmts_to_be_vectorized.
60638fd1498Szrj 
60738fd1498Szrj    Not all stmts in the loop need to be vectorized. For example:
60838fd1498Szrj 
60938fd1498Szrj      for i...
61038fd1498Szrj        for j...
61138fd1498Szrj    1.    T0 = i + j
61238fd1498Szrj    2.	 T1 = a[T0]
61338fd1498Szrj 
61438fd1498Szrj    3.    j = j + 1
61538fd1498Szrj 
61638fd1498Szrj    Stmt 1 and 3 do not need to be vectorized, because loop control and
61738fd1498Szrj    addressing of vectorized data-refs are handled differently.
61838fd1498Szrj 
61938fd1498Szrj    This pass detects such stmts.  */
62038fd1498Szrj 
62138fd1498Szrj bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)62238fd1498Szrj vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
62338fd1498Szrj {
62438fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
62538fd1498Szrj   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
62638fd1498Szrj   unsigned int nbbs = loop->num_nodes;
62738fd1498Szrj   gimple_stmt_iterator si;
62838fd1498Szrj   gimple *stmt;
62938fd1498Szrj   unsigned int i;
63038fd1498Szrj   stmt_vec_info stmt_vinfo;
63138fd1498Szrj   basic_block bb;
63238fd1498Szrj   gimple *phi;
63338fd1498Szrj   bool live_p;
63438fd1498Szrj   enum vect_relevant relevant;
63538fd1498Szrj 
63638fd1498Szrj   if (dump_enabled_p ())
63738fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
63838fd1498Szrj                      "=== vect_mark_stmts_to_be_vectorized ===\n");
63938fd1498Szrj 
64038fd1498Szrj   auto_vec<gimple *, 64> worklist;
64138fd1498Szrj 
64238fd1498Szrj   /* 1. Init worklist.  */
64338fd1498Szrj   for (i = 0; i < nbbs; i++)
64438fd1498Szrj     {
64538fd1498Szrj       bb = bbs[i];
64638fd1498Szrj       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
64738fd1498Szrj 	{
64838fd1498Szrj 	  phi = gsi_stmt (si);
64938fd1498Szrj 	  if (dump_enabled_p ())
65038fd1498Szrj 	    {
65138fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
65238fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
65338fd1498Szrj 	    }
65438fd1498Szrj 
65538fd1498Szrj 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
65638fd1498Szrj 	    vect_mark_relevant (&worklist, phi, relevant, live_p);
65738fd1498Szrj 	}
65838fd1498Szrj       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
65938fd1498Szrj 	{
66038fd1498Szrj 	  stmt = gsi_stmt (si);
66138fd1498Szrj 	  if (dump_enabled_p ())
66238fd1498Szrj 	    {
66338fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
66438fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
66538fd1498Szrj 	    }
66638fd1498Szrj 
66738fd1498Szrj 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
66838fd1498Szrj 	    vect_mark_relevant (&worklist, stmt, relevant, live_p);
66938fd1498Szrj 	}
67038fd1498Szrj     }
67138fd1498Szrj 
67238fd1498Szrj   /* 2. Process_worklist */
67338fd1498Szrj   while (worklist.length () > 0)
67438fd1498Szrj     {
67538fd1498Szrj       use_operand_p use_p;
67638fd1498Szrj       ssa_op_iter iter;
67738fd1498Szrj 
67838fd1498Szrj       stmt = worklist.pop ();
67938fd1498Szrj       if (dump_enabled_p ())
68038fd1498Szrj 	{
68138fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
68238fd1498Szrj           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
68338fd1498Szrj 	}
68438fd1498Szrj 
68538fd1498Szrj       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
68638fd1498Szrj 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
68738fd1498Szrj 	 of STMT.  */
68838fd1498Szrj       stmt_vinfo = vinfo_for_stmt (stmt);
68938fd1498Szrj       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
69038fd1498Szrj 
69138fd1498Szrj       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
69238fd1498Szrj 	 propagated as is to the DEF_STMTs of its USEs.
69338fd1498Szrj 
69438fd1498Szrj 	 One exception is when STMT has been identified as defining a reduction
69538fd1498Szrj 	 variable; in this case we set the relevance to vect_used_by_reduction.
69638fd1498Szrj 	 This is because we distinguish between two kinds of relevant stmts -
69738fd1498Szrj 	 those that are used by a reduction computation, and those that are
69838fd1498Szrj 	 (also) used by a regular computation.  This allows us later on to
69938fd1498Szrj 	 identify stmts that are used solely by a reduction, and therefore the
70038fd1498Szrj 	 order of the results that they produce does not have to be kept.  */
70138fd1498Szrj 
70238fd1498Szrj       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
70338fd1498Szrj         {
70438fd1498Szrj           case vect_reduction_def:
70538fd1498Szrj 	    gcc_assert (relevant != vect_unused_in_scope);
70638fd1498Szrj 	    if (relevant != vect_unused_in_scope
70738fd1498Szrj 		&& relevant != vect_used_in_scope
70838fd1498Szrj 		&& relevant != vect_used_by_reduction
70938fd1498Szrj 		&& relevant != vect_used_only_live)
71038fd1498Szrj 	      {
71138fd1498Szrj 		if (dump_enabled_p ())
71238fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
71338fd1498Szrj 				   "unsupported use of reduction.\n");
71438fd1498Szrj 		return false;
71538fd1498Szrj 	      }
71638fd1498Szrj 	    break;
71738fd1498Szrj 
71838fd1498Szrj           case vect_nested_cycle:
71938fd1498Szrj 	    if (relevant != vect_unused_in_scope
72038fd1498Szrj 		&& relevant != vect_used_in_outer_by_reduction
72138fd1498Szrj 		&& relevant != vect_used_in_outer)
72238fd1498Szrj               {
72338fd1498Szrj                 if (dump_enabled_p ())
72438fd1498Szrj                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
72538fd1498Szrj                                    "unsupported use of nested cycle.\n");
72638fd1498Szrj 
72738fd1498Szrj                 return false;
72838fd1498Szrj               }
72938fd1498Szrj             break;
73038fd1498Szrj 
73138fd1498Szrj           case vect_double_reduction_def:
73238fd1498Szrj 	    if (relevant != vect_unused_in_scope
73338fd1498Szrj 		&& relevant != vect_used_by_reduction
73438fd1498Szrj 		&& relevant != vect_used_only_live)
73538fd1498Szrj               {
73638fd1498Szrj                 if (dump_enabled_p ())
73738fd1498Szrj                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
73838fd1498Szrj                                    "unsupported use of double reduction.\n");
73938fd1498Szrj 
74038fd1498Szrj                 return false;
74138fd1498Szrj               }
74238fd1498Szrj             break;
74338fd1498Szrj 
74438fd1498Szrj           default:
74538fd1498Szrj             break;
74638fd1498Szrj         }
74738fd1498Szrj 
74838fd1498Szrj       if (is_pattern_stmt_p (stmt_vinfo))
74938fd1498Szrj         {
75038fd1498Szrj           /* Pattern statements are not inserted into the code, so
75138fd1498Szrj              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
75238fd1498Szrj              have to scan the RHS or function arguments instead.  */
75338fd1498Szrj           if (is_gimple_assign (stmt))
75438fd1498Szrj             {
75538fd1498Szrj 	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
75638fd1498Szrj 	      tree op = gimple_assign_rhs1 (stmt);
75738fd1498Szrj 
75838fd1498Szrj 	      i = 1;
75938fd1498Szrj 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
76038fd1498Szrj 		{
76138fd1498Szrj 		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
76238fd1498Szrj 				    relevant, &worklist, false)
76338fd1498Szrj 		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
76438fd1498Szrj 				       relevant, &worklist, false))
76538fd1498Szrj 		    return false;
76638fd1498Szrj 		  i = 2;
76738fd1498Szrj 		}
76838fd1498Szrj 	      for (; i < gimple_num_ops (stmt); i++)
76938fd1498Szrj                 {
77038fd1498Szrj 		  op = gimple_op (stmt, i);
77138fd1498Szrj                   if (TREE_CODE (op) == SSA_NAME
77238fd1498Szrj 		      && !process_use (stmt, op, loop_vinfo, relevant,
77338fd1498Szrj 				       &worklist, false))
77438fd1498Szrj                     return false;
77538fd1498Szrj                  }
77638fd1498Szrj             }
77738fd1498Szrj           else if (is_gimple_call (stmt))
77838fd1498Szrj             {
77938fd1498Szrj               for (i = 0; i < gimple_call_num_args (stmt); i++)
78038fd1498Szrj                 {
78138fd1498Szrj                   tree arg = gimple_call_arg (stmt, i);
78238fd1498Szrj 		  if (!process_use (stmt, arg, loop_vinfo, relevant,
78338fd1498Szrj 				    &worklist, false))
78438fd1498Szrj                     return false;
78538fd1498Szrj                 }
78638fd1498Szrj             }
78738fd1498Szrj         }
78838fd1498Szrj       else
78938fd1498Szrj         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
79038fd1498Szrj           {
79138fd1498Szrj             tree op = USE_FROM_PTR (use_p);
79238fd1498Szrj 	    if (!process_use (stmt, op, loop_vinfo, relevant,
79338fd1498Szrj 			      &worklist, false))
79438fd1498Szrj               return false;
79538fd1498Szrj           }
79638fd1498Szrj 
79738fd1498Szrj       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
79838fd1498Szrj 	{
79938fd1498Szrj 	  gather_scatter_info gs_info;
80038fd1498Szrj 	  if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
80138fd1498Szrj 	    gcc_unreachable ();
80238fd1498Szrj 	  if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
80338fd1498Szrj 			    &worklist, true))
80438fd1498Szrj 	    return false;
80538fd1498Szrj 	}
80638fd1498Szrj     } /* while worklist */
80738fd1498Szrj 
80838fd1498Szrj   return true;
80938fd1498Szrj }
81038fd1498Szrj 
81138fd1498Szrj 
81238fd1498Szrj /* Function vect_model_simple_cost.
81338fd1498Szrj 
81438fd1498Szrj    Models cost for simple operations, i.e. those that only emit ncopies of a
81538fd1498Szrj    single op.  Right now, this does not account for multiple insns that could
81638fd1498Szrj    be generated for the single vector op.  We will handle that shortly.  */
81738fd1498Szrj 
81838fd1498Szrj void
vect_model_simple_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)81938fd1498Szrj vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
82038fd1498Szrj 			enum vect_def_type *dt,
82138fd1498Szrj 			int ndts,
82238fd1498Szrj 			stmt_vector_for_cost *prologue_cost_vec,
82338fd1498Szrj 			stmt_vector_for_cost *body_cost_vec)
82438fd1498Szrj {
82538fd1498Szrj   int i;
82638fd1498Szrj   int inside_cost = 0, prologue_cost = 0;
82738fd1498Szrj 
82838fd1498Szrj   /* The SLP costs were already calculated during SLP tree build.  */
82938fd1498Szrj   gcc_assert (!PURE_SLP_STMT (stmt_info));
83038fd1498Szrj 
83138fd1498Szrj   /* Cost the "broadcast" of a scalar operand in to a vector operand.
83238fd1498Szrj      Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
83338fd1498Szrj      cost model.  */
83438fd1498Szrj   for (i = 0; i < ndts; i++)
83538fd1498Szrj     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
83638fd1498Szrj       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
83738fd1498Szrj 					 stmt_info, 0, vect_prologue);
83838fd1498Szrj 
83938fd1498Szrj   /* Pass the inside-of-loop statements to the target-specific cost model.  */
84038fd1498Szrj   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
84138fd1498Szrj 				  stmt_info, 0, vect_body);
84238fd1498Szrj 
84338fd1498Szrj   if (dump_enabled_p ())
84438fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
84538fd1498Szrj                      "vect_model_simple_cost: inside_cost = %d, "
84638fd1498Szrj                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
84738fd1498Szrj }
84838fd1498Szrj 
84938fd1498Szrj 
85038fd1498Szrj /* Model cost for type demotion and promotion operations.  PWR is normally
85138fd1498Szrj    zero for single-step promotions and demotions.  It will be one if
85238fd1498Szrj    two-step promotion/demotion is required, and so on.  Each additional
85338fd1498Szrj    step doubles the number of instructions required.  */
85438fd1498Szrj 
85538fd1498Szrj static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,int pwr)85638fd1498Szrj vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
85738fd1498Szrj 				    enum vect_def_type *dt, int pwr)
85838fd1498Szrj {
85938fd1498Szrj   int i, tmp;
86038fd1498Szrj   int inside_cost = 0, prologue_cost = 0;
86138fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
86238fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
86338fd1498Szrj   void *target_cost_data;
86438fd1498Szrj 
86538fd1498Szrj   /* The SLP costs were already calculated during SLP tree build.  */
86638fd1498Szrj   gcc_assert (!PURE_SLP_STMT (stmt_info));
86738fd1498Szrj 
86838fd1498Szrj   if (loop_vinfo)
86938fd1498Szrj     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
87038fd1498Szrj   else
87138fd1498Szrj     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
87238fd1498Szrj 
87338fd1498Szrj   for (i = 0; i < pwr + 1; i++)
87438fd1498Szrj     {
87538fd1498Szrj       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
87638fd1498Szrj 	(i + 1) : i;
87738fd1498Szrj       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
87838fd1498Szrj 				    vec_promote_demote, stmt_info, 0,
87938fd1498Szrj 				    vect_body);
88038fd1498Szrj     }
88138fd1498Szrj 
88238fd1498Szrj   /* FORNOW: Assuming maximum 2 args per stmts.  */
88338fd1498Szrj   for (i = 0; i < 2; i++)
88438fd1498Szrj     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
88538fd1498Szrj       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
88638fd1498Szrj 				      stmt_info, 0, vect_prologue);
88738fd1498Szrj 
88838fd1498Szrj   if (dump_enabled_p ())
88938fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
89038fd1498Szrj                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
89138fd1498Szrj                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
89238fd1498Szrj }
89338fd1498Szrj 
89438fd1498Szrj /* Function vect_model_store_cost
89538fd1498Szrj 
89638fd1498Szrj    Models cost for stores.  In the case of grouped accesses, one access
89738fd1498Szrj    has the overhead of the grouped access attributed to it.  */
89838fd1498Szrj 
89938fd1498Szrj void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)90038fd1498Szrj vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
90138fd1498Szrj 		       vect_memory_access_type memory_access_type,
90238fd1498Szrj 		       vec_load_store_type vls_type, slp_tree slp_node,
90338fd1498Szrj 		       stmt_vector_for_cost *prologue_cost_vec,
90438fd1498Szrj 		       stmt_vector_for_cost *body_cost_vec)
90538fd1498Szrj {
90638fd1498Szrj   unsigned int inside_cost = 0, prologue_cost = 0;
90738fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
90838fd1498Szrj   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
90938fd1498Szrj   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
91038fd1498Szrj 
91138fd1498Szrj   if (vls_type == VLS_STORE_INVARIANT)
91238fd1498Szrj     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
91338fd1498Szrj 				       stmt_info, 0, vect_prologue);
91438fd1498Szrj 
91538fd1498Szrj   /* Grouped stores update all elements in the group at once,
91638fd1498Szrj      so we want the DR for the first statement.  */
91738fd1498Szrj   if (!slp_node && grouped_access_p)
91838fd1498Szrj     {
91938fd1498Szrj       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
92038fd1498Szrj       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
92138fd1498Szrj     }
92238fd1498Szrj 
92338fd1498Szrj   /* True if we should include any once-per-group costs as well as
92438fd1498Szrj      the cost of the statement itself.  For SLP we only get called
92538fd1498Szrj      once per group anyhow.  */
92638fd1498Szrj   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
92738fd1498Szrj 
92838fd1498Szrj   /* We assume that the cost of a single store-lanes instruction is
92938fd1498Szrj      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
93038fd1498Szrj      access is instead being provided by a permute-and-store operation,
93138fd1498Szrj      include the cost of the permutes.  */
93238fd1498Szrj   if (first_stmt_p
93338fd1498Szrj       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
93438fd1498Szrj     {
93538fd1498Szrj       /* Uses a high and low interleave or shuffle operations for each
93638fd1498Szrj 	 needed permute.  */
93738fd1498Szrj       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
93838fd1498Szrj       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
93938fd1498Szrj       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
94038fd1498Szrj 				      stmt_info, 0, vect_body);
94138fd1498Szrj 
94238fd1498Szrj       if (dump_enabled_p ())
94338fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
94438fd1498Szrj                          "vect_model_store_cost: strided group_size = %d .\n",
94538fd1498Szrj                          group_size);
94638fd1498Szrj     }
94738fd1498Szrj 
94838fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
94938fd1498Szrj   /* Costs of the stores.  */
95038fd1498Szrj   if (memory_access_type == VMAT_ELEMENTWISE
95138fd1498Szrj       || memory_access_type == VMAT_GATHER_SCATTER)
95238fd1498Szrj     {
95338fd1498Szrj       /* N scalar stores plus extracting the elements.  */
95438fd1498Szrj       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
95538fd1498Szrj       inside_cost += record_stmt_cost (body_cost_vec,
95638fd1498Szrj 				       ncopies * assumed_nunits,
95738fd1498Szrj 				       scalar_store, stmt_info, 0, vect_body);
95838fd1498Szrj     }
95938fd1498Szrj   else
96038fd1498Szrj     vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
96138fd1498Szrj 
96238fd1498Szrj   if (memory_access_type == VMAT_ELEMENTWISE
96338fd1498Szrj       || memory_access_type == VMAT_STRIDED_SLP)
96438fd1498Szrj     {
96538fd1498Szrj       /* N scalar stores plus extracting the elements.  */
96638fd1498Szrj       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
96738fd1498Szrj       inside_cost += record_stmt_cost (body_cost_vec,
96838fd1498Szrj 				       ncopies * assumed_nunits,
96938fd1498Szrj 				       vec_to_scalar, stmt_info, 0, vect_body);
97038fd1498Szrj     }
97138fd1498Szrj 
97238fd1498Szrj   if (dump_enabled_p ())
97338fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
97438fd1498Szrj                      "vect_model_store_cost: inside_cost = %d, "
97538fd1498Szrj                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
97638fd1498Szrj }
97738fd1498Szrj 
97838fd1498Szrj 
97938fd1498Szrj /* Calculate cost of DR's memory access.  */
98038fd1498Szrj void
vect_get_store_cost(struct data_reference * dr,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)98138fd1498Szrj vect_get_store_cost (struct data_reference *dr, int ncopies,
98238fd1498Szrj 		     unsigned int *inside_cost,
98338fd1498Szrj 		     stmt_vector_for_cost *body_cost_vec)
98438fd1498Szrj {
98538fd1498Szrj   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
98638fd1498Szrj   gimple *stmt = DR_STMT (dr);
98738fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
98838fd1498Szrj 
98938fd1498Szrj   switch (alignment_support_scheme)
99038fd1498Szrj     {
99138fd1498Szrj     case dr_aligned:
99238fd1498Szrj       {
99338fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
99438fd1498Szrj 					  vector_store, stmt_info, 0,
99538fd1498Szrj 					  vect_body);
99638fd1498Szrj 
99738fd1498Szrj         if (dump_enabled_p ())
99838fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
99938fd1498Szrj                            "vect_model_store_cost: aligned.\n");
100038fd1498Szrj         break;
100138fd1498Szrj       }
100238fd1498Szrj 
100338fd1498Szrj     case dr_unaligned_supported:
100438fd1498Szrj       {
100538fd1498Szrj         /* Here, we assign an additional cost for the unaligned store.  */
100638fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
100738fd1498Szrj 					  unaligned_store, stmt_info,
100838fd1498Szrj 					  DR_MISALIGNMENT (dr), vect_body);
100938fd1498Szrj         if (dump_enabled_p ())
101038fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
101138fd1498Szrj                            "vect_model_store_cost: unaligned supported by "
101238fd1498Szrj                            "hardware.\n");
101338fd1498Szrj         break;
101438fd1498Szrj       }
101538fd1498Szrj 
101638fd1498Szrj     case dr_unaligned_unsupported:
101738fd1498Szrj       {
101838fd1498Szrj         *inside_cost = VECT_MAX_COST;
101938fd1498Szrj 
102038fd1498Szrj         if (dump_enabled_p ())
102138fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
102238fd1498Szrj                            "vect_model_store_cost: unsupported access.\n");
102338fd1498Szrj         break;
102438fd1498Szrj       }
102538fd1498Szrj 
102638fd1498Szrj     default:
102738fd1498Szrj       gcc_unreachable ();
102838fd1498Szrj     }
102938fd1498Szrj }
103038fd1498Szrj 
103138fd1498Szrj 
103238fd1498Szrj /* Function vect_model_load_cost
103338fd1498Szrj 
103438fd1498Szrj    Models cost for loads.  In the case of grouped accesses, one access has
103538fd1498Szrj    the overhead of the grouped access attributed to it.  Since unaligned
103638fd1498Szrj    accesses are supported for loads, we also account for the costs of the
103738fd1498Szrj    access scheme chosen.  */
103838fd1498Szrj 
103938fd1498Szrj void
vect_model_load_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)104038fd1498Szrj vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
104138fd1498Szrj 		      vect_memory_access_type memory_access_type,
104238fd1498Szrj 		      slp_tree slp_node,
104338fd1498Szrj 		      stmt_vector_for_cost *prologue_cost_vec,
104438fd1498Szrj 		      stmt_vector_for_cost *body_cost_vec)
104538fd1498Szrj {
104638fd1498Szrj   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
104738fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
104838fd1498Szrj   unsigned int inside_cost = 0, prologue_cost = 0;
104938fd1498Szrj   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
105038fd1498Szrj 
105138fd1498Szrj   /* Grouped loads read all elements in the group at once,
105238fd1498Szrj      so we want the DR for the first statement.  */
105338fd1498Szrj   if (!slp_node && grouped_access_p)
105438fd1498Szrj     {
105538fd1498Szrj       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
105638fd1498Szrj       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
105738fd1498Szrj     }
105838fd1498Szrj 
105938fd1498Szrj   /* True if we should include any once-per-group costs as well as
106038fd1498Szrj      the cost of the statement itself.  For SLP we only get called
106138fd1498Szrj      once per group anyhow.  */
106238fd1498Szrj   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
106338fd1498Szrj 
106438fd1498Szrj   /* We assume that the cost of a single load-lanes instruction is
106538fd1498Szrj      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
106638fd1498Szrj      access is instead being provided by a load-and-permute operation,
106738fd1498Szrj      include the cost of the permutes.  */
106838fd1498Szrj   if (first_stmt_p
106938fd1498Szrj       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
107038fd1498Szrj     {
107138fd1498Szrj       /* Uses an even and odd extract operations or shuffle operations
107238fd1498Szrj 	 for each needed permute.  */
107338fd1498Szrj       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
107438fd1498Szrj       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
107538fd1498Szrj       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
107638fd1498Szrj 				      stmt_info, 0, vect_body);
107738fd1498Szrj 
107838fd1498Szrj       if (dump_enabled_p ())
107938fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
108038fd1498Szrj                          "vect_model_load_cost: strided group_size = %d .\n",
108138fd1498Szrj                          group_size);
108238fd1498Szrj     }
108338fd1498Szrj 
108438fd1498Szrj   /* The loads themselves.  */
108538fd1498Szrj   if (memory_access_type == VMAT_ELEMENTWISE
108638fd1498Szrj       || memory_access_type == VMAT_GATHER_SCATTER)
108738fd1498Szrj     {
108838fd1498Szrj       /* N scalar loads plus gathering them into a vector.  */
108938fd1498Szrj       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
109038fd1498Szrj       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
109138fd1498Szrj       inside_cost += record_stmt_cost (body_cost_vec,
109238fd1498Szrj 				       ncopies * assumed_nunits,
109338fd1498Szrj 				       scalar_load, stmt_info, 0, vect_body);
109438fd1498Szrj     }
109538fd1498Szrj   else
109638fd1498Szrj     vect_get_load_cost (dr, ncopies, first_stmt_p,
109738fd1498Szrj 			&inside_cost, &prologue_cost,
109838fd1498Szrj 			prologue_cost_vec, body_cost_vec, true);
109938fd1498Szrj   if (memory_access_type == VMAT_ELEMENTWISE
110038fd1498Szrj       || memory_access_type == VMAT_STRIDED_SLP)
110138fd1498Szrj     inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
110238fd1498Szrj 				     stmt_info, 0, vect_body);
110338fd1498Szrj 
110438fd1498Szrj   if (dump_enabled_p ())
110538fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
110638fd1498Szrj                      "vect_model_load_cost: inside_cost = %d, "
110738fd1498Szrj                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
110838fd1498Szrj }
110938fd1498Szrj 
111038fd1498Szrj 
111138fd1498Szrj /* Calculate cost of DR's memory access.  */
111238fd1498Szrj void
vect_get_load_cost(struct data_reference * dr,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)111338fd1498Szrj vect_get_load_cost (struct data_reference *dr, int ncopies,
111438fd1498Szrj 		    bool add_realign_cost, unsigned int *inside_cost,
111538fd1498Szrj 		    unsigned int *prologue_cost,
111638fd1498Szrj 		    stmt_vector_for_cost *prologue_cost_vec,
111738fd1498Szrj 		    stmt_vector_for_cost *body_cost_vec,
111838fd1498Szrj 		    bool record_prologue_costs)
111938fd1498Szrj {
112038fd1498Szrj   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
112138fd1498Szrj   gimple *stmt = DR_STMT (dr);
112238fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
112338fd1498Szrj 
112438fd1498Szrj   switch (alignment_support_scheme)
112538fd1498Szrj     {
112638fd1498Szrj     case dr_aligned:
112738fd1498Szrj       {
112838fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
112938fd1498Szrj 					  stmt_info, 0, vect_body);
113038fd1498Szrj 
113138fd1498Szrj         if (dump_enabled_p ())
113238fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
113338fd1498Szrj                            "vect_model_load_cost: aligned.\n");
113438fd1498Szrj 
113538fd1498Szrj         break;
113638fd1498Szrj       }
113738fd1498Szrj     case dr_unaligned_supported:
113838fd1498Szrj       {
113938fd1498Szrj         /* Here, we assign an additional cost for the unaligned load.  */
114038fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
114138fd1498Szrj 					  unaligned_load, stmt_info,
114238fd1498Szrj 					  DR_MISALIGNMENT (dr), vect_body);
114338fd1498Szrj 
114438fd1498Szrj         if (dump_enabled_p ())
114538fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
114638fd1498Szrj                            "vect_model_load_cost: unaligned supported by "
114738fd1498Szrj                            "hardware.\n");
114838fd1498Szrj 
114938fd1498Szrj         break;
115038fd1498Szrj       }
115138fd1498Szrj     case dr_explicit_realign:
115238fd1498Szrj       {
115338fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
115438fd1498Szrj 					  vector_load, stmt_info, 0, vect_body);
115538fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
115638fd1498Szrj 					  vec_perm, stmt_info, 0, vect_body);
115738fd1498Szrj 
115838fd1498Szrj         /* FIXME: If the misalignment remains fixed across the iterations of
115938fd1498Szrj            the containing loop, the following cost should be added to the
116038fd1498Szrj            prologue costs.  */
116138fd1498Szrj         if (targetm.vectorize.builtin_mask_for_load)
116238fd1498Szrj 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
116338fd1498Szrj 					    stmt_info, 0, vect_body);
116438fd1498Szrj 
116538fd1498Szrj         if (dump_enabled_p ())
116638fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
116738fd1498Szrj                            "vect_model_load_cost: explicit realign\n");
116838fd1498Szrj 
116938fd1498Szrj         break;
117038fd1498Szrj       }
117138fd1498Szrj     case dr_explicit_realign_optimized:
117238fd1498Szrj       {
117338fd1498Szrj         if (dump_enabled_p ())
117438fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
117538fd1498Szrj                            "vect_model_load_cost: unaligned software "
117638fd1498Szrj                            "pipelined.\n");
117738fd1498Szrj 
117838fd1498Szrj         /* Unaligned software pipeline has a load of an address, an initial
117938fd1498Szrj            load, and possibly a mask operation to "prime" the loop.  However,
118038fd1498Szrj            if this is an access in a group of loads, which provide grouped
118138fd1498Szrj            access, then the above cost should only be considered for one
118238fd1498Szrj            access in the group.  Inside the loop, there is a load op
118338fd1498Szrj            and a realignment op.  */
118438fd1498Szrj 
118538fd1498Szrj         if (add_realign_cost && record_prologue_costs)
118638fd1498Szrj           {
118738fd1498Szrj 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
118838fd1498Szrj 						vector_stmt, stmt_info,
118938fd1498Szrj 						0, vect_prologue);
119038fd1498Szrj             if (targetm.vectorize.builtin_mask_for_load)
119138fd1498Szrj 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
119238fd1498Szrj 						  vector_stmt, stmt_info,
119338fd1498Szrj 						  0, vect_prologue);
119438fd1498Szrj           }
119538fd1498Szrj 
119638fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
119738fd1498Szrj 					  stmt_info, 0, vect_body);
119838fd1498Szrj 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
119938fd1498Szrj 					  stmt_info, 0, vect_body);
120038fd1498Szrj 
120138fd1498Szrj         if (dump_enabled_p ())
120238fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
120338fd1498Szrj                            "vect_model_load_cost: explicit realign optimized"
120438fd1498Szrj                            "\n");
120538fd1498Szrj 
120638fd1498Szrj         break;
120738fd1498Szrj       }
120838fd1498Szrj 
120938fd1498Szrj     case dr_unaligned_unsupported:
121038fd1498Szrj       {
121138fd1498Szrj         *inside_cost = VECT_MAX_COST;
121238fd1498Szrj 
121338fd1498Szrj         if (dump_enabled_p ())
121438fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
121538fd1498Szrj                            "vect_model_load_cost: unsupported access.\n");
121638fd1498Szrj         break;
121738fd1498Szrj       }
121838fd1498Szrj 
121938fd1498Szrj     default:
122038fd1498Szrj       gcc_unreachable ();
122138fd1498Szrj     }
122238fd1498Szrj }
122338fd1498Szrj 
122438fd1498Szrj /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
122538fd1498Szrj    the loop preheader for the vectorized stmt STMT.  */
122638fd1498Szrj 
122738fd1498Szrj static void
vect_init_vector_1(gimple * stmt,gimple * new_stmt,gimple_stmt_iterator * gsi)122838fd1498Szrj vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
122938fd1498Szrj {
123038fd1498Szrj   if (gsi)
123138fd1498Szrj     vect_finish_stmt_generation (stmt, new_stmt, gsi);
123238fd1498Szrj   else
123338fd1498Szrj     {
123438fd1498Szrj       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
123538fd1498Szrj       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
123638fd1498Szrj 
123738fd1498Szrj       if (loop_vinfo)
123838fd1498Szrj         {
123938fd1498Szrj           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124038fd1498Szrj 	  basic_block new_bb;
124138fd1498Szrj 	  edge pe;
124238fd1498Szrj 
124338fd1498Szrj           if (nested_in_vect_loop_p (loop, stmt))
124438fd1498Szrj             loop = loop->inner;
124538fd1498Szrj 
124638fd1498Szrj 	  pe = loop_preheader_edge (loop);
124738fd1498Szrj           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
124838fd1498Szrj           gcc_assert (!new_bb);
124938fd1498Szrj 	}
125038fd1498Szrj       else
125138fd1498Szrj        {
125238fd1498Szrj           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
125338fd1498Szrj           basic_block bb;
125438fd1498Szrj           gimple_stmt_iterator gsi_bb_start;
125538fd1498Szrj 
125638fd1498Szrj           gcc_assert (bb_vinfo);
125738fd1498Szrj           bb = BB_VINFO_BB (bb_vinfo);
125838fd1498Szrj           gsi_bb_start = gsi_after_labels (bb);
125938fd1498Szrj           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
126038fd1498Szrj        }
126138fd1498Szrj     }
126238fd1498Szrj 
126338fd1498Szrj   if (dump_enabled_p ())
126438fd1498Szrj     {
126538fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
126638fd1498Szrj                        "created new init_stmt: ");
126738fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
126838fd1498Szrj     }
126938fd1498Szrj }
127038fd1498Szrj 
127138fd1498Szrj /* Function vect_init_vector.
127238fd1498Szrj 
127338fd1498Szrj    Insert a new stmt (INIT_STMT) that initializes a new variable of type
127438fd1498Szrj    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
127538fd1498Szrj    vector type a vector with all elements equal to VAL is created first.
127638fd1498Szrj    Place the initialization at BSI if it is not NULL.  Otherwise, place the
127738fd1498Szrj    initialization at the loop preheader.
127838fd1498Szrj    Return the DEF of INIT_STMT.
127938fd1498Szrj    It will be used in the vectorization of STMT.  */
128038fd1498Szrj 
128138fd1498Szrj tree
vect_init_vector(gimple * stmt,tree val,tree type,gimple_stmt_iterator * gsi)128238fd1498Szrj vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
128338fd1498Szrj {
128438fd1498Szrj   gimple *init_stmt;
128538fd1498Szrj   tree new_temp;
128638fd1498Szrj 
128738fd1498Szrj   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
128838fd1498Szrj   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
128938fd1498Szrj     {
129038fd1498Szrj       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
129138fd1498Szrj       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
129238fd1498Szrj 	{
129338fd1498Szrj 	  /* Scalar boolean value should be transformed into
129438fd1498Szrj 	     all zeros or all ones value before building a vector.  */
129538fd1498Szrj 	  if (VECTOR_BOOLEAN_TYPE_P (type))
129638fd1498Szrj 	    {
129738fd1498Szrj 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
129838fd1498Szrj 	      tree false_val = build_zero_cst (TREE_TYPE (type));
129938fd1498Szrj 
130038fd1498Szrj 	      if (CONSTANT_CLASS_P (val))
130138fd1498Szrj 		val = integer_zerop (val) ? false_val : true_val;
130238fd1498Szrj 	      else
130338fd1498Szrj 		{
130438fd1498Szrj 		  new_temp = make_ssa_name (TREE_TYPE (type));
130538fd1498Szrj 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
130638fd1498Szrj 						   val, true_val, false_val);
130738fd1498Szrj 		  vect_init_vector_1 (stmt, init_stmt, gsi);
130838fd1498Szrj 		  val = new_temp;
130938fd1498Szrj 		}
131038fd1498Szrj 	    }
131138fd1498Szrj 	  else if (CONSTANT_CLASS_P (val))
131238fd1498Szrj 	    val = fold_convert (TREE_TYPE (type), val);
131338fd1498Szrj 	  else
131438fd1498Szrj 	    {
131538fd1498Szrj 	      new_temp = make_ssa_name (TREE_TYPE (type));
131638fd1498Szrj 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
131738fd1498Szrj 		init_stmt = gimple_build_assign (new_temp,
131838fd1498Szrj 						 fold_build1 (VIEW_CONVERT_EXPR,
131938fd1498Szrj 							      TREE_TYPE (type),
132038fd1498Szrj 							      val));
132138fd1498Szrj 	      else
132238fd1498Szrj 		init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
132338fd1498Szrj 	      vect_init_vector_1 (stmt, init_stmt, gsi);
132438fd1498Szrj 	      val = new_temp;
132538fd1498Szrj 	    }
132638fd1498Szrj 	}
132738fd1498Szrj       val = build_vector_from_val (type, val);
132838fd1498Szrj     }
132938fd1498Szrj 
133038fd1498Szrj   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
133138fd1498Szrj   init_stmt = gimple_build_assign  (new_temp, val);
133238fd1498Szrj   vect_init_vector_1 (stmt, init_stmt, gsi);
133338fd1498Szrj   return new_temp;
133438fd1498Szrj }
133538fd1498Szrj 
133638fd1498Szrj /* Function vect_get_vec_def_for_operand_1.
133738fd1498Szrj 
133838fd1498Szrj    For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
133938fd1498Szrj    DT that will be used in the vectorized stmt.  */
134038fd1498Szrj 
134138fd1498Szrj tree
vect_get_vec_def_for_operand_1(gimple * def_stmt,enum vect_def_type dt)134238fd1498Szrj vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
134338fd1498Szrj {
134438fd1498Szrj   tree vec_oprnd;
134538fd1498Szrj   gimple *vec_stmt;
134638fd1498Szrj   stmt_vec_info def_stmt_info = NULL;
134738fd1498Szrj 
134838fd1498Szrj   switch (dt)
134938fd1498Szrj     {
135038fd1498Szrj     /* operand is a constant or a loop invariant.  */
135138fd1498Szrj     case vect_constant_def:
135238fd1498Szrj     case vect_external_def:
135338fd1498Szrj       /* Code should use vect_get_vec_def_for_operand.  */
135438fd1498Szrj       gcc_unreachable ();
135538fd1498Szrj 
135638fd1498Szrj     /* operand is defined inside the loop.  */
135738fd1498Szrj     case vect_internal_def:
135838fd1498Szrj       {
135938fd1498Szrj         /* Get the def from the vectorized stmt.  */
136038fd1498Szrj         def_stmt_info = vinfo_for_stmt (def_stmt);
136138fd1498Szrj 
136238fd1498Szrj         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
136338fd1498Szrj         /* Get vectorized pattern statement.  */
136438fd1498Szrj         if (!vec_stmt
136538fd1498Szrj             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
136638fd1498Szrj             && !STMT_VINFO_RELEVANT (def_stmt_info))
136738fd1498Szrj           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
136838fd1498Szrj                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
136938fd1498Szrj         gcc_assert (vec_stmt);
137038fd1498Szrj 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
137138fd1498Szrj 	  vec_oprnd = PHI_RESULT (vec_stmt);
137238fd1498Szrj 	else if (is_gimple_call (vec_stmt))
137338fd1498Szrj 	  vec_oprnd = gimple_call_lhs (vec_stmt);
137438fd1498Szrj 	else
137538fd1498Szrj 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
137638fd1498Szrj         return vec_oprnd;
137738fd1498Szrj       }
137838fd1498Szrj 
137938fd1498Szrj     /* operand is defined by a loop header phi.  */
138038fd1498Szrj     case vect_reduction_def:
138138fd1498Szrj     case vect_double_reduction_def:
138238fd1498Szrj     case vect_nested_cycle:
138338fd1498Szrj     case vect_induction_def:
138438fd1498Szrj       {
138538fd1498Szrj 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
138638fd1498Szrj 
138738fd1498Szrj         /* Get the def from the vectorized stmt.  */
138838fd1498Szrj         def_stmt_info = vinfo_for_stmt (def_stmt);
138938fd1498Szrj         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
139038fd1498Szrj 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
139138fd1498Szrj 	  vec_oprnd = PHI_RESULT (vec_stmt);
139238fd1498Szrj 	else
139338fd1498Szrj 	  vec_oprnd = gimple_get_lhs (vec_stmt);
139438fd1498Szrj         return vec_oprnd;
139538fd1498Szrj       }
139638fd1498Szrj 
139738fd1498Szrj     default:
139838fd1498Szrj       gcc_unreachable ();
139938fd1498Szrj     }
140038fd1498Szrj }
140138fd1498Szrj 
140238fd1498Szrj 
140338fd1498Szrj /* Function vect_get_vec_def_for_operand.
140438fd1498Szrj 
140538fd1498Szrj    OP is an operand in STMT.  This function returns a (vector) def that will be
140638fd1498Szrj    used in the vectorized stmt for STMT.
140738fd1498Szrj 
140838fd1498Szrj    In the case that OP is an SSA_NAME which is defined in the loop, then
140938fd1498Szrj    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
141038fd1498Szrj 
141138fd1498Szrj    In case OP is an invariant or constant, a new stmt that creates a vector def
141238fd1498Szrj    needs to be introduced.  VECTYPE may be used to specify a required type for
141338fd1498Szrj    vector invariant.  */
141438fd1498Szrj 
141538fd1498Szrj tree
vect_get_vec_def_for_operand(tree op,gimple * stmt,tree vectype)141638fd1498Szrj vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
141738fd1498Szrj {
141838fd1498Szrj   gimple *def_stmt;
141938fd1498Szrj   enum vect_def_type dt;
142038fd1498Szrj   bool is_simple_use;
142138fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
142238fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
142338fd1498Szrj 
142438fd1498Szrj   if (dump_enabled_p ())
142538fd1498Szrj     {
142638fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
142738fd1498Szrj                        "vect_get_vec_def_for_operand: ");
142838fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
142938fd1498Szrj       dump_printf (MSG_NOTE, "\n");
143038fd1498Szrj     }
143138fd1498Szrj 
143238fd1498Szrj   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
143338fd1498Szrj   gcc_assert (is_simple_use);
143438fd1498Szrj   if (def_stmt && dump_enabled_p ())
143538fd1498Szrj     {
143638fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
143738fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
143838fd1498Szrj     }
143938fd1498Szrj 
144038fd1498Szrj   if (dt == vect_constant_def || dt == vect_external_def)
144138fd1498Szrj     {
144238fd1498Szrj       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
144338fd1498Szrj       tree vector_type;
144438fd1498Szrj 
144538fd1498Szrj       if (vectype)
144638fd1498Szrj 	vector_type = vectype;
144738fd1498Szrj       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
144838fd1498Szrj 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
144938fd1498Szrj 	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
145038fd1498Szrj       else
145138fd1498Szrj 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
145238fd1498Szrj 
145338fd1498Szrj       gcc_assert (vector_type);
145438fd1498Szrj       return vect_init_vector (stmt, op, vector_type, NULL);
145538fd1498Szrj     }
145638fd1498Szrj   else
145738fd1498Szrj     return vect_get_vec_def_for_operand_1 (def_stmt, dt);
145838fd1498Szrj }
145938fd1498Szrj 
146038fd1498Szrj 
146138fd1498Szrj /* Function vect_get_vec_def_for_stmt_copy
146238fd1498Szrj 
146338fd1498Szrj    Return a vector-def for an operand.  This function is used when the
146438fd1498Szrj    vectorized stmt to be created (by the caller to this function) is a "copy"
146538fd1498Szrj    created in case the vectorized result cannot fit in one vector, and several
146638fd1498Szrj    copies of the vector-stmt are required.  In this case the vector-def is
146738fd1498Szrj    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
146838fd1498Szrj    of the stmt that defines VEC_OPRND.
146938fd1498Szrj    DT is the type of the vector def VEC_OPRND.
147038fd1498Szrj 
147138fd1498Szrj    Context:
147238fd1498Szrj         In case the vectorization factor (VF) is bigger than the number
147338fd1498Szrj    of elements that can fit in a vectype (nunits), we have to generate
147438fd1498Szrj    more than one vector stmt to vectorize the scalar stmt.  This situation
147538fd1498Szrj    arises when there are multiple data-types operated upon in the loop; the
147638fd1498Szrj    smallest data-type determines the VF, and as a result, when vectorizing
147738fd1498Szrj    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
147838fd1498Szrj    vector stmt (each computing a vector of 'nunits' results, and together
147938fd1498Szrj    computing 'VF' results in each iteration).  This function is called when
148038fd1498Szrj    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
148138fd1498Szrj    which VF=16 and nunits=4, so the number of copies required is 4):
148238fd1498Szrj 
148338fd1498Szrj    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
148438fd1498Szrj 
148538fd1498Szrj    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
148638fd1498Szrj                         VS1.1:  vx.1 = memref1      VS1.2
148738fd1498Szrj                         VS1.2:  vx.2 = memref2      VS1.3
148838fd1498Szrj                         VS1.3:  vx.3 = memref3
148938fd1498Szrj 
149038fd1498Szrj    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
149138fd1498Szrj                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
149238fd1498Szrj                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
149338fd1498Szrj                         VSnew.3:  vz3 = vx.3 + ...
149438fd1498Szrj 
149538fd1498Szrj    The vectorization of S1 is explained in vectorizable_load.
149638fd1498Szrj    The vectorization of S2:
149738fd1498Szrj         To create the first vector-stmt out of the 4 copies - VSnew.0 -
149838fd1498Szrj    the function 'vect_get_vec_def_for_operand' is called to
149938fd1498Szrj    get the relevant vector-def for each operand of S2.  For operand x it
150038fd1498Szrj    returns  the vector-def 'vx.0'.
150138fd1498Szrj 
150238fd1498Szrj         To create the remaining copies of the vector-stmt (VSnew.j), this
150338fd1498Szrj    function is called to get the relevant vector-def for each operand.  It is
150438fd1498Szrj    obtained from the respective VS1.j stmt, which is recorded in the
150538fd1498Szrj    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
150638fd1498Szrj 
150738fd1498Szrj         For example, to obtain the vector-def 'vx.1' in order to create the
150838fd1498Szrj    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
150938fd1498Szrj    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
151038fd1498Szrj    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
151138fd1498Szrj    and return its def ('vx.1').
151238fd1498Szrj    Overall, to create the above sequence this function will be called 3 times:
151338fd1498Szrj         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
151438fd1498Szrj         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
151538fd1498Szrj         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
151638fd1498Szrj 
151738fd1498Szrj tree
vect_get_vec_def_for_stmt_copy(enum vect_def_type dt,tree vec_oprnd)151838fd1498Szrj vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
151938fd1498Szrj {
152038fd1498Szrj   gimple *vec_stmt_for_operand;
152138fd1498Szrj   stmt_vec_info def_stmt_info;
152238fd1498Szrj 
152338fd1498Szrj   /* Do nothing; can reuse same def.  */
152438fd1498Szrj   if (dt == vect_external_def || dt == vect_constant_def )
152538fd1498Szrj     return vec_oprnd;
152638fd1498Szrj 
152738fd1498Szrj   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
152838fd1498Szrj   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
152938fd1498Szrj   gcc_assert (def_stmt_info);
153038fd1498Szrj   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
153138fd1498Szrj   gcc_assert (vec_stmt_for_operand);
153238fd1498Szrj   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
153338fd1498Szrj     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
153438fd1498Szrj   else
153538fd1498Szrj     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
153638fd1498Szrj   return vec_oprnd;
153738fd1498Szrj }
153838fd1498Szrj 
153938fd1498Szrj 
154038fd1498Szrj /* Get vectorized definitions for the operands to create a copy of an original
154138fd1498Szrj    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
154238fd1498Szrj 
154338fd1498Szrj void
vect_get_vec_defs_for_stmt_copy(enum vect_def_type * dt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)154438fd1498Szrj vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
154538fd1498Szrj 				 vec<tree> *vec_oprnds0,
154638fd1498Szrj 				 vec<tree> *vec_oprnds1)
154738fd1498Szrj {
154838fd1498Szrj   tree vec_oprnd = vec_oprnds0->pop ();
154938fd1498Szrj 
155038fd1498Szrj   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
155138fd1498Szrj   vec_oprnds0->quick_push (vec_oprnd);
155238fd1498Szrj 
155338fd1498Szrj   if (vec_oprnds1 && vec_oprnds1->length ())
155438fd1498Szrj     {
155538fd1498Szrj       vec_oprnd = vec_oprnds1->pop ();
155638fd1498Szrj       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
155738fd1498Szrj       vec_oprnds1->quick_push (vec_oprnd);
155838fd1498Szrj     }
155938fd1498Szrj }
156038fd1498Szrj 
156138fd1498Szrj 
156238fd1498Szrj /* Get vectorized definitions for OP0 and OP1.  */
156338fd1498Szrj 
156438fd1498Szrj void
vect_get_vec_defs(tree op0,tree op1,gimple * stmt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)156538fd1498Szrj vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
156638fd1498Szrj 		   vec<tree> *vec_oprnds0,
156738fd1498Szrj 		   vec<tree> *vec_oprnds1,
156838fd1498Szrj 		   slp_tree slp_node)
156938fd1498Szrj {
157038fd1498Szrj   if (slp_node)
157138fd1498Szrj     {
157238fd1498Szrj       int nops = (op1 == NULL_TREE) ? 1 : 2;
157338fd1498Szrj       auto_vec<tree> ops (nops);
157438fd1498Szrj       auto_vec<vec<tree> > vec_defs (nops);
157538fd1498Szrj 
157638fd1498Szrj       ops.quick_push (op0);
157738fd1498Szrj       if (op1)
157838fd1498Szrj         ops.quick_push (op1);
157938fd1498Szrj 
158038fd1498Szrj       vect_get_slp_defs (ops, slp_node, &vec_defs);
158138fd1498Szrj 
158238fd1498Szrj       *vec_oprnds0 = vec_defs[0];
158338fd1498Szrj       if (op1)
158438fd1498Szrj 	*vec_oprnds1 = vec_defs[1];
158538fd1498Szrj     }
158638fd1498Szrj   else
158738fd1498Szrj     {
158838fd1498Szrj       tree vec_oprnd;
158938fd1498Szrj 
159038fd1498Szrj       vec_oprnds0->create (1);
159138fd1498Szrj       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
159238fd1498Szrj       vec_oprnds0->quick_push (vec_oprnd);
159338fd1498Szrj 
159438fd1498Szrj       if (op1)
159538fd1498Szrj 	{
159638fd1498Szrj 	  vec_oprnds1->create (1);
159738fd1498Szrj 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
159838fd1498Szrj 	  vec_oprnds1->quick_push (vec_oprnd);
159938fd1498Szrj 	}
160038fd1498Szrj     }
160138fd1498Szrj }
160238fd1498Szrj 
160338fd1498Szrj /* Helper function called by vect_finish_replace_stmt and
160438fd1498Szrj    vect_finish_stmt_generation.  Set the location of the new
160538fd1498Szrj    statement and create a stmt_vec_info for it.  */
160638fd1498Szrj 
160738fd1498Szrj static void
vect_finish_stmt_generation_1(gimple * stmt,gimple * vec_stmt)160838fd1498Szrj vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
160938fd1498Szrj {
161038fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
161138fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
161238fd1498Szrj 
161338fd1498Szrj   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
161438fd1498Szrj 
161538fd1498Szrj   if (dump_enabled_p ())
161638fd1498Szrj     {
161738fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
161838fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
161938fd1498Szrj     }
162038fd1498Szrj 
162138fd1498Szrj   gimple_set_location (vec_stmt, gimple_location (stmt));
162238fd1498Szrj 
162338fd1498Szrj   /* While EH edges will generally prevent vectorization, stmt might
162438fd1498Szrj      e.g. be in a must-not-throw region.  Ensure newly created stmts
162538fd1498Szrj      that could throw are part of the same region.  */
162638fd1498Szrj   int lp_nr = lookup_stmt_eh_lp (stmt);
162738fd1498Szrj   if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
162838fd1498Szrj     add_stmt_to_eh_lp (vec_stmt, lp_nr);
162938fd1498Szrj }
163038fd1498Szrj 
163138fd1498Szrj /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
163238fd1498Szrj    which sets the same scalar result as STMT did.  */
163338fd1498Szrj 
163438fd1498Szrj void
vect_finish_replace_stmt(gimple * stmt,gimple * vec_stmt)163538fd1498Szrj vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
163638fd1498Szrj {
163738fd1498Szrj   gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
163838fd1498Szrj 
163938fd1498Szrj   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640*58e805e6Szrj   gsi_replace (&gsi, vec_stmt, true);
164138fd1498Szrj 
164238fd1498Szrj   vect_finish_stmt_generation_1 (stmt, vec_stmt);
164338fd1498Szrj }
164438fd1498Szrj 
164538fd1498Szrj /* Function vect_finish_stmt_generation.
164638fd1498Szrj 
164738fd1498Szrj    Insert a new stmt.  */
164838fd1498Szrj 
164938fd1498Szrj void
vect_finish_stmt_generation(gimple * stmt,gimple * vec_stmt,gimple_stmt_iterator * gsi)165038fd1498Szrj vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
165138fd1498Szrj 			     gimple_stmt_iterator *gsi)
165238fd1498Szrj {
165338fd1498Szrj   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
165438fd1498Szrj 
165538fd1498Szrj   if (!gsi_end_p (*gsi)
165638fd1498Szrj       && gimple_has_mem_ops (vec_stmt))
165738fd1498Szrj     {
165838fd1498Szrj       gimple *at_stmt = gsi_stmt (*gsi);
165938fd1498Szrj       tree vuse = gimple_vuse (at_stmt);
166038fd1498Szrj       if (vuse && TREE_CODE (vuse) == SSA_NAME)
166138fd1498Szrj 	{
166238fd1498Szrj 	  tree vdef = gimple_vdef (at_stmt);
166338fd1498Szrj 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
166438fd1498Szrj 	  /* If we have an SSA vuse and insert a store, update virtual
166538fd1498Szrj 	     SSA form to avoid triggering the renamer.  Do so only
166638fd1498Szrj 	     if we can easily see all uses - which is what almost always
166738fd1498Szrj 	     happens with the way vectorized stmts are inserted.  */
166838fd1498Szrj 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
166938fd1498Szrj 	      && ((is_gimple_assign (vec_stmt)
167038fd1498Szrj 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
167138fd1498Szrj 		  || (is_gimple_call (vec_stmt)
167238fd1498Szrj 		      && !(gimple_call_flags (vec_stmt)
167338fd1498Szrj 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
167438fd1498Szrj 	    {
167538fd1498Szrj 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
167638fd1498Szrj 	      gimple_set_vdef (vec_stmt, new_vdef);
167738fd1498Szrj 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
167838fd1498Szrj 	    }
167938fd1498Szrj 	}
168038fd1498Szrj     }
168138fd1498Szrj   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
168238fd1498Szrj   vect_finish_stmt_generation_1 (stmt, vec_stmt);
168338fd1498Szrj }
168438fd1498Szrj 
168538fd1498Szrj /* We want to vectorize a call to combined function CFN with function
168638fd1498Szrj    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
168738fd1498Szrj    as the types of all inputs.  Check whether this is possible using
168838fd1498Szrj    an internal function, returning its code if so or IFN_LAST if not.  */
168938fd1498Szrj 
169038fd1498Szrj static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)169138fd1498Szrj vectorizable_internal_function (combined_fn cfn, tree fndecl,
169238fd1498Szrj 				tree vectype_out, tree vectype_in)
169338fd1498Szrj {
169438fd1498Szrj   internal_fn ifn;
169538fd1498Szrj   if (internal_fn_p (cfn))
169638fd1498Szrj     ifn = as_internal_fn (cfn);
169738fd1498Szrj   else
169838fd1498Szrj     ifn = associated_internal_fn (fndecl);
169938fd1498Szrj   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
170038fd1498Szrj     {
170138fd1498Szrj       const direct_internal_fn_info &info = direct_internal_fn (ifn);
170238fd1498Szrj       if (info.vectorizable)
170338fd1498Szrj 	{
170438fd1498Szrj 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
170538fd1498Szrj 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
170638fd1498Szrj 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
170738fd1498Szrj 					      OPTIMIZE_FOR_SPEED))
170838fd1498Szrj 	    return ifn;
170938fd1498Szrj 	}
171038fd1498Szrj     }
171138fd1498Szrj   return IFN_LAST;
171238fd1498Szrj }
171338fd1498Szrj 
171438fd1498Szrj 
171538fd1498Szrj static tree permute_vec_elements (tree, tree, tree, gimple *,
171638fd1498Szrj 				  gimple_stmt_iterator *);
171738fd1498Szrj 
171838fd1498Szrj /* Check whether a load or store statement in the loop described by
171938fd1498Szrj    LOOP_VINFO is possible in a fully-masked loop.  This is testing
172038fd1498Szrj    whether the vectorizer pass has the appropriate support, as well as
172138fd1498Szrj    whether the target does.
172238fd1498Szrj 
172338fd1498Szrj    VLS_TYPE says whether the statement is a load or store and VECTYPE
172438fd1498Szrj    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
172538fd1498Szrj    says how the load or store is going to be implemented and GROUP_SIZE
172638fd1498Szrj    is the number of load or store statements in the containing group.
172738fd1498Szrj    If the access is a gather load or scatter store, GS_INFO describes
172838fd1498Szrj    its arguments.
172938fd1498Szrj 
173038fd1498Szrj    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
173138fd1498Szrj    supported, otherwise record the required mask types.  */
173238fd1498Szrj 
173338fd1498Szrj static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info)173438fd1498Szrj check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
173538fd1498Szrj 			  vec_load_store_type vls_type, int group_size,
173638fd1498Szrj 			  vect_memory_access_type memory_access_type,
173738fd1498Szrj 			  gather_scatter_info *gs_info)
173838fd1498Szrj {
173938fd1498Szrj   /* Invariant loads need no special support.  */
174038fd1498Szrj   if (memory_access_type == VMAT_INVARIANT)
174138fd1498Szrj     return;
174238fd1498Szrj 
174338fd1498Szrj   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
174438fd1498Szrj   machine_mode vecmode = TYPE_MODE (vectype);
174538fd1498Szrj   bool is_load = (vls_type == VLS_LOAD);
174638fd1498Szrj   if (memory_access_type == VMAT_LOAD_STORE_LANES)
174738fd1498Szrj     {
174838fd1498Szrj       if (is_load
174938fd1498Szrj 	  ? !vect_load_lanes_supported (vectype, group_size, true)
175038fd1498Szrj 	  : !vect_store_lanes_supported (vectype, group_size, true))
175138fd1498Szrj 	{
175238fd1498Szrj 	  if (dump_enabled_p ())
175338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
175438fd1498Szrj 			     "can't use a fully-masked loop because the"
175538fd1498Szrj 			     " target doesn't have an appropriate masked"
175638fd1498Szrj 			     " load/store-lanes instruction.\n");
175738fd1498Szrj 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
175838fd1498Szrj 	  return;
175938fd1498Szrj 	}
176038fd1498Szrj       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
176138fd1498Szrj       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
176238fd1498Szrj       return;
176338fd1498Szrj     }
176438fd1498Szrj 
176538fd1498Szrj   if (memory_access_type == VMAT_GATHER_SCATTER)
176638fd1498Szrj     {
176738fd1498Szrj       internal_fn ifn = (is_load
176838fd1498Szrj 			 ? IFN_MASK_GATHER_LOAD
176938fd1498Szrj 			 : IFN_MASK_SCATTER_STORE);
177038fd1498Szrj       tree offset_type = TREE_TYPE (gs_info->offset);
177138fd1498Szrj       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
177238fd1498Szrj 						   gs_info->memory_type,
177338fd1498Szrj 						   TYPE_SIGN (offset_type),
177438fd1498Szrj 						   gs_info->scale))
177538fd1498Szrj 	{
177638fd1498Szrj 	  if (dump_enabled_p ())
177738fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
177838fd1498Szrj 			     "can't use a fully-masked loop because the"
177938fd1498Szrj 			     " target doesn't have an appropriate masked"
178038fd1498Szrj 			     " gather load or scatter store instruction.\n");
178138fd1498Szrj 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
178238fd1498Szrj 	  return;
178338fd1498Szrj 	}
178438fd1498Szrj       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
178538fd1498Szrj       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
178638fd1498Szrj       return;
178738fd1498Szrj     }
178838fd1498Szrj 
178938fd1498Szrj   if (memory_access_type != VMAT_CONTIGUOUS
179038fd1498Szrj       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
179138fd1498Szrj     {
179238fd1498Szrj       /* Element X of the data must come from iteration i * VF + X of the
179338fd1498Szrj 	 scalar loop.  We need more work to support other mappings.  */
179438fd1498Szrj       if (dump_enabled_p ())
179538fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
179638fd1498Szrj 			 "can't use a fully-masked loop because an access"
179738fd1498Szrj 			 " isn't contiguous.\n");
179838fd1498Szrj       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
179938fd1498Szrj       return;
180038fd1498Szrj     }
180138fd1498Szrj 
180238fd1498Szrj   machine_mode mask_mode;
180338fd1498Szrj   if (!(targetm.vectorize.get_mask_mode
180438fd1498Szrj 	(GET_MODE_NUNITS (vecmode),
180538fd1498Szrj 	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
180638fd1498Szrj       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
180738fd1498Szrj     {
180838fd1498Szrj       if (dump_enabled_p ())
180938fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
181038fd1498Szrj 			 "can't use a fully-masked loop because the target"
181138fd1498Szrj 			 " doesn't have the appropriate masked load or"
181238fd1498Szrj 			 " store.\n");
181338fd1498Szrj       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
181438fd1498Szrj       return;
181538fd1498Szrj     }
181638fd1498Szrj   /* We might load more scalars than we need for permuting SLP loads.
181738fd1498Szrj      We checked in get_group_load_store_type that the extra elements
181838fd1498Szrj      don't leak into a new vector.  */
181938fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
182038fd1498Szrj   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
182138fd1498Szrj   unsigned int nvectors;
182238fd1498Szrj   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
182338fd1498Szrj     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
182438fd1498Szrj   else
182538fd1498Szrj     gcc_unreachable ();
182638fd1498Szrj }
182738fd1498Szrj 
182838fd1498Szrj /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
182938fd1498Szrj    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
183038fd1498Szrj    that needs to be applied to all loads and stores in a vectorized loop.
183138fd1498Szrj    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
183238fd1498Szrj 
183338fd1498Szrj    MASK_TYPE is the type of both masks.  If new statements are needed,
183438fd1498Szrj    insert them before GSI.  */
183538fd1498Szrj 
183638fd1498Szrj static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)183738fd1498Szrj prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
183838fd1498Szrj 			 gimple_stmt_iterator *gsi)
183938fd1498Szrj {
184038fd1498Szrj   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
184138fd1498Szrj   if (!loop_mask)
184238fd1498Szrj     return vec_mask;
184338fd1498Szrj 
184438fd1498Szrj   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
184538fd1498Szrj   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
184638fd1498Szrj   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
184738fd1498Szrj 					  vec_mask, loop_mask);
184838fd1498Szrj   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
184938fd1498Szrj   return and_res;
185038fd1498Szrj }
185138fd1498Szrj 
185238fd1498Szrj /* Determine whether we can use a gather load or scatter store to vectorize
185338fd1498Szrj    strided load or store STMT by truncating the current offset to a smaller
185438fd1498Szrj    width.  We need to be able to construct an offset vector:
185538fd1498Szrj 
185638fd1498Szrj      { 0, X, X*2, X*3, ... }
185738fd1498Szrj 
185838fd1498Szrj    without loss of precision, where X is STMT's DR_STEP.
185938fd1498Szrj 
186038fd1498Szrj    Return true if this is possible, describing the gather load or scatter
186138fd1498Szrj    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
186238fd1498Szrj 
186338fd1498Szrj static bool
vect_truncate_gather_scatter_offset(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)186438fd1498Szrj vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
186538fd1498Szrj 				     bool masked_p,
186638fd1498Szrj 				     gather_scatter_info *gs_info)
186738fd1498Szrj {
186838fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
186938fd1498Szrj   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
187038fd1498Szrj   tree step = DR_STEP (dr);
187138fd1498Szrj   if (TREE_CODE (step) != INTEGER_CST)
187238fd1498Szrj     {
187338fd1498Szrj       /* ??? Perhaps we could use range information here?  */
187438fd1498Szrj       if (dump_enabled_p ())
187538fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
187638fd1498Szrj 			 "cannot truncate variable step.\n");
187738fd1498Szrj       return false;
187838fd1498Szrj     }
187938fd1498Szrj 
188038fd1498Szrj   /* Get the number of bits in an element.  */
188138fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
188238fd1498Szrj   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
188338fd1498Szrj   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
188438fd1498Szrj 
188538fd1498Szrj   /* Set COUNT to the upper limit on the number of elements - 1.
188638fd1498Szrj      Start with the maximum vectorization factor.  */
188738fd1498Szrj   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
188838fd1498Szrj 
188938fd1498Szrj   /* Try lowering COUNT to the number of scalar latch iterations.  */
189038fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
189138fd1498Szrj   widest_int max_iters;
189238fd1498Szrj   if (max_loop_iterations (loop, &max_iters)
189338fd1498Szrj       && max_iters < count)
189438fd1498Szrj     count = max_iters.to_shwi ();
189538fd1498Szrj 
189638fd1498Szrj   /* Try scales of 1 and the element size.  */
189738fd1498Szrj   int scales[] = { 1, vect_get_scalar_dr_size (dr) };
189838fd1498Szrj   bool overflow_p = false;
189938fd1498Szrj   for (int i = 0; i < 2; ++i)
190038fd1498Szrj     {
190138fd1498Szrj       int scale = scales[i];
190238fd1498Szrj       widest_int factor;
190338fd1498Szrj       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
190438fd1498Szrj 	continue;
190538fd1498Szrj 
190638fd1498Szrj       /* See whether we can calculate (COUNT - 1) * STEP / SCALE
190738fd1498Szrj 	 in OFFSET_BITS bits.  */
190838fd1498Szrj       widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
190938fd1498Szrj       if (overflow_p)
191038fd1498Szrj 	continue;
191138fd1498Szrj       signop sign = range >= 0 ? UNSIGNED : SIGNED;
191238fd1498Szrj       if (wi::min_precision (range, sign) > element_bits)
191338fd1498Szrj 	{
191438fd1498Szrj 	  overflow_p = true;
191538fd1498Szrj 	  continue;
191638fd1498Szrj 	}
191738fd1498Szrj 
191838fd1498Szrj       /* See whether the target supports the operation.  */
191938fd1498Szrj       tree memory_type = TREE_TYPE (DR_REF (dr));
192038fd1498Szrj       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
192138fd1498Szrj 				     memory_type, element_bits, sign, scale,
192238fd1498Szrj 				     &gs_info->ifn, &gs_info->element_type))
192338fd1498Szrj 	continue;
192438fd1498Szrj 
192538fd1498Szrj       tree offset_type = build_nonstandard_integer_type (element_bits,
192638fd1498Szrj 							 sign == UNSIGNED);
192738fd1498Szrj 
192838fd1498Szrj       gs_info->decl = NULL_TREE;
192938fd1498Szrj       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
193038fd1498Szrj 	 but we don't need to store that here.  */
193138fd1498Szrj       gs_info->base = NULL_TREE;
193238fd1498Szrj       gs_info->offset = fold_convert (offset_type, step);
193338fd1498Szrj       gs_info->offset_dt = vect_constant_def;
193438fd1498Szrj       gs_info->offset_vectype = NULL_TREE;
193538fd1498Szrj       gs_info->scale = scale;
193638fd1498Szrj       gs_info->memory_type = memory_type;
193738fd1498Szrj       return true;
193838fd1498Szrj     }
193938fd1498Szrj 
194038fd1498Szrj   if (overflow_p && dump_enabled_p ())
194138fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
194238fd1498Szrj 		     "truncating gather/scatter offset to %d bits"
194338fd1498Szrj 		     " might change its value.\n", element_bits);
194438fd1498Szrj 
194538fd1498Szrj   return false;
194638fd1498Szrj }
194738fd1498Szrj 
194838fd1498Szrj /* Return true if we can use gather/scatter internal functions to
194938fd1498Szrj    vectorize STMT, which is a grouped or strided load or store.
195038fd1498Szrj    MASKED_P is true if load or store is conditional.  When returning
195138fd1498Szrj    true, fill in GS_INFO with the information required to perform the
195238fd1498Szrj    operation.  */
195338fd1498Szrj 
195438fd1498Szrj static bool
vect_use_strided_gather_scatters_p(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)195538fd1498Szrj vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
195638fd1498Szrj 				    bool masked_p,
195738fd1498Szrj 				    gather_scatter_info *gs_info)
195838fd1498Szrj {
195938fd1498Szrj   if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
196038fd1498Szrj       || gs_info->decl)
196138fd1498Szrj     return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
196238fd1498Szrj 						masked_p, gs_info);
196338fd1498Szrj 
196438fd1498Szrj   scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
196538fd1498Szrj   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
196638fd1498Szrj   tree offset_type = TREE_TYPE (gs_info->offset);
196738fd1498Szrj   unsigned int offset_bits = TYPE_PRECISION (offset_type);
196838fd1498Szrj 
196938fd1498Szrj   /* Enforced by vect_check_gather_scatter.  */
197038fd1498Szrj   gcc_assert (element_bits >= offset_bits);
197138fd1498Szrj 
197238fd1498Szrj   /* If the elements are wider than the offset, convert the offset to the
197338fd1498Szrj      same width, without changing its sign.  */
197438fd1498Szrj   if (element_bits > offset_bits)
197538fd1498Szrj     {
197638fd1498Szrj       bool unsigned_p = TYPE_UNSIGNED (offset_type);
197738fd1498Szrj       offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
197838fd1498Szrj       gs_info->offset = fold_convert (offset_type, gs_info->offset);
197938fd1498Szrj     }
198038fd1498Szrj 
198138fd1498Szrj   if (dump_enabled_p ())
198238fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
198338fd1498Szrj 		     "using gather/scatter for strided/grouped access,"
198438fd1498Szrj 		     " scale = %d\n", gs_info->scale);
198538fd1498Szrj 
198638fd1498Szrj   return true;
198738fd1498Szrj }
198838fd1498Szrj 
198938fd1498Szrj /* STMT is a non-strided load or store, meaning that it accesses
199038fd1498Szrj    elements with a known constant step.  Return -1 if that step
199138fd1498Szrj    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
199238fd1498Szrj 
199338fd1498Szrj static int
compare_step_with_zero(gimple * stmt)199438fd1498Szrj compare_step_with_zero (gimple *stmt)
199538fd1498Szrj {
199638fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
199738fd1498Szrj   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
199838fd1498Szrj   return tree_int_cst_compare (vect_dr_behavior (dr)->step,
199938fd1498Szrj 			       size_zero_node);
200038fd1498Szrj }
200138fd1498Szrj 
200238fd1498Szrj /* If the target supports a permute mask that reverses the elements in
200338fd1498Szrj    a vector of type VECTYPE, return that mask, otherwise return null.  */
200438fd1498Szrj 
200538fd1498Szrj static tree
perm_mask_for_reverse(tree vectype)200638fd1498Szrj perm_mask_for_reverse (tree vectype)
200738fd1498Szrj {
200838fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
200938fd1498Szrj 
201038fd1498Szrj   /* The encoding has a single stepped pattern.  */
201138fd1498Szrj   vec_perm_builder sel (nunits, 1, 3);
201238fd1498Szrj   for (int i = 0; i < 3; ++i)
201338fd1498Szrj     sel.quick_push (nunits - 1 - i);
201438fd1498Szrj 
201538fd1498Szrj   vec_perm_indices indices (sel, 1, nunits);
201638fd1498Szrj   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
201738fd1498Szrj     return NULL_TREE;
201838fd1498Szrj   return vect_gen_perm_mask_checked (vectype, indices);
201938fd1498Szrj }
202038fd1498Szrj 
202138fd1498Szrj /* STMT is either a masked or unconditional store.  Return the value
202238fd1498Szrj    being stored.  */
202338fd1498Szrj 
202438fd1498Szrj tree
vect_get_store_rhs(gimple * stmt)202538fd1498Szrj vect_get_store_rhs (gimple *stmt)
202638fd1498Szrj {
202738fd1498Szrj   if (gassign *assign = dyn_cast <gassign *> (stmt))
202838fd1498Szrj     {
202938fd1498Szrj       gcc_assert (gimple_assign_single_p (assign));
203038fd1498Szrj       return gimple_assign_rhs1 (assign);
203138fd1498Szrj     }
203238fd1498Szrj   if (gcall *call = dyn_cast <gcall *> (stmt))
203338fd1498Szrj     {
203438fd1498Szrj       internal_fn ifn = gimple_call_internal_fn (call);
203538fd1498Szrj       int index = internal_fn_stored_value_index (ifn);
203638fd1498Szrj       gcc_assert (index >= 0);
203738fd1498Szrj       return gimple_call_arg (stmt, index);
203838fd1498Szrj     }
203938fd1498Szrj   gcc_unreachable ();
204038fd1498Szrj }
204138fd1498Szrj 
204238fd1498Szrj /* A subroutine of get_load_store_type, with a subset of the same
204338fd1498Szrj    arguments.  Handle the case where STMT is part of a grouped load
204438fd1498Szrj    or store.
204538fd1498Szrj 
204638fd1498Szrj    For stores, the statements in the group are all consecutive
204738fd1498Szrj    and there is no gap at the end.  For loads, the statements in the
204838fd1498Szrj    group might not be consecutive; there can be gaps between statements
204938fd1498Szrj    as well as at the end.  */
205038fd1498Szrj 
205138fd1498Szrj static bool
get_group_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)205238fd1498Szrj get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
205338fd1498Szrj 			   bool masked_p, vec_load_store_type vls_type,
205438fd1498Szrj 			   vect_memory_access_type *memory_access_type,
205538fd1498Szrj 			   gather_scatter_info *gs_info)
205638fd1498Szrj {
205738fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205838fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
205938fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
206038fd1498Szrj   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
206138fd1498Szrj   gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
206238fd1498Szrj   data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
206338fd1498Szrj   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
206438fd1498Szrj   bool single_element_p = (stmt == first_stmt
206538fd1498Szrj 			   && !GROUP_NEXT_ELEMENT (stmt_info));
206638fd1498Szrj   unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
206738fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
206838fd1498Szrj 
206938fd1498Szrj   /* True if the vectorized statements would access beyond the last
207038fd1498Szrj      statement in the group.  */
207138fd1498Szrj   bool overrun_p = false;
207238fd1498Szrj 
207338fd1498Szrj   /* True if we can cope with such overrun by peeling for gaps, so that
207438fd1498Szrj      there is at least one final scalar iteration after the vector loop.  */
207538fd1498Szrj   bool can_overrun_p = (!masked_p
207638fd1498Szrj 			&& vls_type == VLS_LOAD
207738fd1498Szrj 			&& loop_vinfo
207838fd1498Szrj 			&& !loop->inner);
207938fd1498Szrj 
208038fd1498Szrj   /* There can only be a gap at the end of the group if the stride is
208138fd1498Szrj      known at compile time.  */
208238fd1498Szrj   gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
208338fd1498Szrj 
208438fd1498Szrj   /* Stores can't yet have gaps.  */
208538fd1498Szrj   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
208638fd1498Szrj 
208738fd1498Szrj   if (slp)
208838fd1498Szrj     {
208938fd1498Szrj       if (STMT_VINFO_STRIDED_P (stmt_info))
209038fd1498Szrj 	{
209138fd1498Szrj 	  /* Try to use consecutive accesses of GROUP_SIZE elements,
209238fd1498Szrj 	     separated by the stride, until we have a complete vector.
209338fd1498Szrj 	     Fall back to scalar accesses if that isn't possible.  */
209438fd1498Szrj 	  if (multiple_p (nunits, group_size))
209538fd1498Szrj 	    *memory_access_type = VMAT_STRIDED_SLP;
209638fd1498Szrj 	  else
209738fd1498Szrj 	    *memory_access_type = VMAT_ELEMENTWISE;
209838fd1498Szrj 	}
209938fd1498Szrj       else
210038fd1498Szrj 	{
210138fd1498Szrj 	  overrun_p = loop_vinfo && gap != 0;
210238fd1498Szrj 	  if (overrun_p && vls_type != VLS_LOAD)
210338fd1498Szrj 	    {
210438fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
210538fd1498Szrj 			       "Grouped store with gaps requires"
210638fd1498Szrj 			       " non-consecutive accesses\n");
210738fd1498Szrj 	      return false;
210838fd1498Szrj 	    }
210938fd1498Szrj 	  /* An overrun is fine if the trailing elements are smaller
211038fd1498Szrj 	     than the alignment boundary B.  Every vector access will
211138fd1498Szrj 	     be a multiple of B and so we are guaranteed to access a
211238fd1498Szrj 	     non-gap element in the same B-sized block.  */
211338fd1498Szrj 	  if (overrun_p
211438fd1498Szrj 	      && gap < (vect_known_alignment_in_bytes (first_dr)
211538fd1498Szrj 			/ vect_get_scalar_dr_size (first_dr)))
211638fd1498Szrj 	    overrun_p = false;
211738fd1498Szrj 	  if (overrun_p && !can_overrun_p)
211838fd1498Szrj 	    {
211938fd1498Szrj 	      if (dump_enabled_p ())
212038fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
212138fd1498Szrj 				 "Peeling for outer loop is not supported\n");
212238fd1498Szrj 	      return false;
212338fd1498Szrj 	    }
212438fd1498Szrj 	  *memory_access_type = VMAT_CONTIGUOUS;
212538fd1498Szrj 	}
212638fd1498Szrj     }
212738fd1498Szrj   else
212838fd1498Szrj     {
212938fd1498Szrj       /* We can always handle this case using elementwise accesses,
213038fd1498Szrj 	 but see if something more efficient is available.  */
213138fd1498Szrj       *memory_access_type = VMAT_ELEMENTWISE;
213238fd1498Szrj 
213338fd1498Szrj       /* If there is a gap at the end of the group then these optimizations
213438fd1498Szrj 	 would access excess elements in the last iteration.  */
213538fd1498Szrj       bool would_overrun_p = (gap != 0);
213638fd1498Szrj       /* An overrun is fine if the trailing elements are smaller than the
213738fd1498Szrj 	 alignment boundary B.  Every vector access will be a multiple of B
213838fd1498Szrj 	 and so we are guaranteed to access a non-gap element in the
213938fd1498Szrj 	 same B-sized block.  */
214038fd1498Szrj       if (would_overrun_p
214138fd1498Szrj 	  && !masked_p
214238fd1498Szrj 	  && gap < (vect_known_alignment_in_bytes (first_dr)
214338fd1498Szrj 		    / vect_get_scalar_dr_size (first_dr)))
214438fd1498Szrj 	would_overrun_p = false;
214538fd1498Szrj 
214638fd1498Szrj       if (!STMT_VINFO_STRIDED_P (stmt_info)
214738fd1498Szrj 	  && (can_overrun_p || !would_overrun_p)
214838fd1498Szrj 	  && compare_step_with_zero (stmt) > 0)
214938fd1498Szrj 	{
215038fd1498Szrj 	  /* First cope with the degenerate case of a single-element
215138fd1498Szrj 	     vector.  */
215238fd1498Szrj 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
215338fd1498Szrj 	    *memory_access_type = VMAT_CONTIGUOUS;
215438fd1498Szrj 
215538fd1498Szrj 	  /* Otherwise try using LOAD/STORE_LANES.  */
215638fd1498Szrj 	  if (*memory_access_type == VMAT_ELEMENTWISE
215738fd1498Szrj 	      && (vls_type == VLS_LOAD
215838fd1498Szrj 		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
215938fd1498Szrj 		  : vect_store_lanes_supported (vectype, group_size,
216038fd1498Szrj 						masked_p)))
216138fd1498Szrj 	    {
216238fd1498Szrj 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
216338fd1498Szrj 	      overrun_p = would_overrun_p;
216438fd1498Szrj 	    }
216538fd1498Szrj 
216638fd1498Szrj 	  /* If that fails, try using permuting loads.  */
216738fd1498Szrj 	  if (*memory_access_type == VMAT_ELEMENTWISE
216838fd1498Szrj 	      && (vls_type == VLS_LOAD
216938fd1498Szrj 		  ? vect_grouped_load_supported (vectype, single_element_p,
217038fd1498Szrj 						 group_size)
217138fd1498Szrj 		  : vect_grouped_store_supported (vectype, group_size)))
217238fd1498Szrj 	    {
217338fd1498Szrj 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
217438fd1498Szrj 	      overrun_p = would_overrun_p;
217538fd1498Szrj 	    }
217638fd1498Szrj 	}
217738fd1498Szrj 
217838fd1498Szrj       /* As a last resort, trying using a gather load or scatter store.
217938fd1498Szrj 
218038fd1498Szrj 	 ??? Although the code can handle all group sizes correctly,
218138fd1498Szrj 	 it probably isn't a win to use separate strided accesses based
218238fd1498Szrj 	 on nearby locations.  Or, even if it's a win over scalar code,
218338fd1498Szrj 	 it might not be a win over vectorizing at a lower VF, if that
218438fd1498Szrj 	 allows us to use contiguous accesses.  */
218538fd1498Szrj       if (*memory_access_type == VMAT_ELEMENTWISE
218638fd1498Szrj 	  && single_element_p
218738fd1498Szrj 	  && loop_vinfo
218838fd1498Szrj 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
218938fd1498Szrj 						 masked_p, gs_info))
219038fd1498Szrj 	*memory_access_type = VMAT_GATHER_SCATTER;
219138fd1498Szrj     }
219238fd1498Szrj 
219338fd1498Szrj   if (vls_type != VLS_LOAD && first_stmt == stmt)
219438fd1498Szrj     {
219538fd1498Szrj       /* STMT is the leader of the group. Check the operands of all the
219638fd1498Szrj 	 stmts of the group.  */
219738fd1498Szrj       gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
219838fd1498Szrj       while (next_stmt)
219938fd1498Szrj 	{
220038fd1498Szrj 	  tree op = vect_get_store_rhs (next_stmt);
220138fd1498Szrj 	  gimple *def_stmt;
220238fd1498Szrj 	  enum vect_def_type dt;
220338fd1498Szrj 	  if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
220438fd1498Szrj 	    {
220538fd1498Szrj 	      if (dump_enabled_p ())
220638fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
220738fd1498Szrj 				 "use not simple.\n");
220838fd1498Szrj 	      return false;
220938fd1498Szrj 	    }
221038fd1498Szrj 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
221138fd1498Szrj 	}
221238fd1498Szrj     }
221338fd1498Szrj 
221438fd1498Szrj   if (overrun_p)
221538fd1498Szrj     {
221638fd1498Szrj       gcc_assert (can_overrun_p);
221738fd1498Szrj       if (dump_enabled_p ())
221838fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
221938fd1498Szrj 			 "Data access with gaps requires scalar "
222038fd1498Szrj 			 "epilogue loop\n");
222138fd1498Szrj       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
222238fd1498Szrj     }
222338fd1498Szrj 
222438fd1498Szrj   return true;
222538fd1498Szrj }
222638fd1498Szrj 
222738fd1498Szrj /* A subroutine of get_load_store_type, with a subset of the same
222838fd1498Szrj    arguments.  Handle the case where STMT is a load or store that
222938fd1498Szrj    accesses consecutive elements with a negative step.  */
223038fd1498Szrj 
223138fd1498Szrj static vect_memory_access_type
get_negative_load_store_type(gimple * stmt,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)223238fd1498Szrj get_negative_load_store_type (gimple *stmt, tree vectype,
223338fd1498Szrj 			      vec_load_store_type vls_type,
223438fd1498Szrj 			      unsigned int ncopies)
223538fd1498Szrj {
223638fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
223738fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
223838fd1498Szrj   dr_alignment_support alignment_support_scheme;
223938fd1498Szrj 
224038fd1498Szrj   if (ncopies > 1)
224138fd1498Szrj     {
224238fd1498Szrj       if (dump_enabled_p ())
224338fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
224438fd1498Szrj 			 "multiple types with negative step.\n");
224538fd1498Szrj       return VMAT_ELEMENTWISE;
224638fd1498Szrj     }
224738fd1498Szrj 
224838fd1498Szrj   alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
224938fd1498Szrj   if (alignment_support_scheme != dr_aligned
225038fd1498Szrj       && alignment_support_scheme != dr_unaligned_supported)
225138fd1498Szrj     {
225238fd1498Szrj       if (dump_enabled_p ())
225338fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
225438fd1498Szrj 			 "negative step but alignment required.\n");
225538fd1498Szrj       return VMAT_ELEMENTWISE;
225638fd1498Szrj     }
225738fd1498Szrj 
225838fd1498Szrj   if (vls_type == VLS_STORE_INVARIANT)
225938fd1498Szrj     {
226038fd1498Szrj       if (dump_enabled_p ())
226138fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
226238fd1498Szrj 			 "negative step with invariant source;"
226338fd1498Szrj 			 " no permute needed.\n");
226438fd1498Szrj       return VMAT_CONTIGUOUS_DOWN;
226538fd1498Szrj     }
226638fd1498Szrj 
226738fd1498Szrj   if (!perm_mask_for_reverse (vectype))
226838fd1498Szrj     {
226938fd1498Szrj       if (dump_enabled_p ())
227038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
227138fd1498Szrj 			 "negative step and reversing not supported.\n");
227238fd1498Szrj       return VMAT_ELEMENTWISE;
227338fd1498Szrj     }
227438fd1498Szrj 
227538fd1498Szrj   return VMAT_CONTIGUOUS_REVERSE;
227638fd1498Szrj }
227738fd1498Szrj 
227838fd1498Szrj /* Analyze load or store statement STMT of type VLS_TYPE.  Return true
227938fd1498Szrj    if there is a memory access type that the vectorized form can use,
228038fd1498Szrj    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
228138fd1498Szrj    or scatters, fill in GS_INFO accordingly.
228238fd1498Szrj 
228338fd1498Szrj    SLP says whether we're performing SLP rather than loop vectorization.
228438fd1498Szrj    MASKED_P is true if the statement is conditional on a vectorized mask.
228538fd1498Szrj    VECTYPE is the vector type that the vectorized statements will use.
228638fd1498Szrj    NCOPIES is the number of vector statements that will be needed.  */
228738fd1498Szrj 
228838fd1498Szrj static bool
get_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)228938fd1498Szrj get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
229038fd1498Szrj 		     vec_load_store_type vls_type, unsigned int ncopies,
229138fd1498Szrj 		     vect_memory_access_type *memory_access_type,
229238fd1498Szrj 		     gather_scatter_info *gs_info)
229338fd1498Szrj {
229438fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
229538fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
229638fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
229738fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
229838fd1498Szrj   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
229938fd1498Szrj     {
230038fd1498Szrj       *memory_access_type = VMAT_GATHER_SCATTER;
230138fd1498Szrj       gimple *def_stmt;
230238fd1498Szrj       if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
230338fd1498Szrj 	gcc_unreachable ();
230438fd1498Szrj       else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
230538fd1498Szrj 				    &gs_info->offset_dt,
230638fd1498Szrj 				    &gs_info->offset_vectype))
230738fd1498Szrj 	{
230838fd1498Szrj 	  if (dump_enabled_p ())
230938fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
231038fd1498Szrj 			     "%s index use not simple.\n",
231138fd1498Szrj 			     vls_type == VLS_LOAD ? "gather" : "scatter");
231238fd1498Szrj 	  return false;
231338fd1498Szrj 	}
231438fd1498Szrj     }
231538fd1498Szrj   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
231638fd1498Szrj     {
231738fd1498Szrj       if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
231838fd1498Szrj 				      memory_access_type, gs_info))
231938fd1498Szrj 	return false;
232038fd1498Szrj     }
232138fd1498Szrj   else if (STMT_VINFO_STRIDED_P (stmt_info))
232238fd1498Szrj     {
232338fd1498Szrj       gcc_assert (!slp);
232438fd1498Szrj       if (loop_vinfo
232538fd1498Szrj 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
232638fd1498Szrj 						 masked_p, gs_info))
232738fd1498Szrj 	*memory_access_type = VMAT_GATHER_SCATTER;
232838fd1498Szrj       else
232938fd1498Szrj 	*memory_access_type = VMAT_ELEMENTWISE;
233038fd1498Szrj     }
233138fd1498Szrj   else
233238fd1498Szrj     {
233338fd1498Szrj       int cmp = compare_step_with_zero (stmt);
233438fd1498Szrj       if (cmp < 0)
233538fd1498Szrj 	*memory_access_type = get_negative_load_store_type
233638fd1498Szrj 	  (stmt, vectype, vls_type, ncopies);
233738fd1498Szrj       else if (cmp == 0)
233838fd1498Szrj 	{
233938fd1498Szrj 	  gcc_assert (vls_type == VLS_LOAD);
234038fd1498Szrj 	  *memory_access_type = VMAT_INVARIANT;
234138fd1498Szrj 	}
234238fd1498Szrj       else
234338fd1498Szrj 	*memory_access_type = VMAT_CONTIGUOUS;
234438fd1498Szrj     }
234538fd1498Szrj 
234638fd1498Szrj   if ((*memory_access_type == VMAT_ELEMENTWISE
234738fd1498Szrj        || *memory_access_type == VMAT_STRIDED_SLP)
234838fd1498Szrj       && !nunits.is_constant ())
234938fd1498Szrj     {
235038fd1498Szrj       if (dump_enabled_p ())
235138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
235238fd1498Szrj 			 "Not using elementwise accesses due to variable "
235338fd1498Szrj 			 "vectorization factor.\n");
235438fd1498Szrj       return false;
235538fd1498Szrj     }
235638fd1498Szrj 
235738fd1498Szrj   /* FIXME: At the moment the cost model seems to underestimate the
235838fd1498Szrj      cost of using elementwise accesses.  This check preserves the
235938fd1498Szrj      traditional behavior until that can be fixed.  */
236038fd1498Szrj   if (*memory_access_type == VMAT_ELEMENTWISE
236138fd1498Szrj       && !STMT_VINFO_STRIDED_P (stmt_info)
236238fd1498Szrj       && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
236338fd1498Szrj 	   && !GROUP_NEXT_ELEMENT (stmt_info)
236438fd1498Szrj 	   && !pow2p_hwi (GROUP_SIZE (stmt_info))))
236538fd1498Szrj     {
236638fd1498Szrj       if (dump_enabled_p ())
236738fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
236838fd1498Szrj 			 "not falling back to elementwise accesses\n");
236938fd1498Szrj       return false;
237038fd1498Szrj     }
237138fd1498Szrj   return true;
237238fd1498Szrj }
237338fd1498Szrj 
237438fd1498Szrj /* Return true if boolean argument MASK is suitable for vectorizing
237538fd1498Szrj    conditional load or store STMT.  When returning true, store the type
237638fd1498Szrj    of the definition in *MASK_DT_OUT and the type of the vectorized mask
237738fd1498Szrj    in *MASK_VECTYPE_OUT.  */
237838fd1498Szrj 
237938fd1498Szrj static bool
vect_check_load_store_mask(gimple * stmt,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)238038fd1498Szrj vect_check_load_store_mask (gimple *stmt, tree mask,
238138fd1498Szrj 			    vect_def_type *mask_dt_out,
238238fd1498Szrj 			    tree *mask_vectype_out)
238338fd1498Szrj {
238438fd1498Szrj   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
238538fd1498Szrj     {
238638fd1498Szrj       if (dump_enabled_p ())
238738fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
238838fd1498Szrj 			 "mask argument is not a boolean.\n");
238938fd1498Szrj       return false;
239038fd1498Szrj     }
239138fd1498Szrj 
239238fd1498Szrj   if (TREE_CODE (mask) != SSA_NAME)
239338fd1498Szrj     {
239438fd1498Szrj       if (dump_enabled_p ())
239538fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
239638fd1498Szrj 			 "mask argument is not an SSA name.\n");
239738fd1498Szrj       return false;
239838fd1498Szrj     }
239938fd1498Szrj 
240038fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
240138fd1498Szrj   gimple *def_stmt;
240238fd1498Szrj   enum vect_def_type mask_dt;
240338fd1498Szrj   tree mask_vectype;
240438fd1498Szrj   if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
240538fd1498Szrj 			   &mask_vectype))
240638fd1498Szrj     {
240738fd1498Szrj       if (dump_enabled_p ())
240838fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
240938fd1498Szrj 			 "mask use not simple.\n");
241038fd1498Szrj       return false;
241138fd1498Szrj     }
241238fd1498Szrj 
241338fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
241438fd1498Szrj   if (!mask_vectype)
241538fd1498Szrj     mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
241638fd1498Szrj 
241738fd1498Szrj   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
241838fd1498Szrj     {
241938fd1498Szrj       if (dump_enabled_p ())
242038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
242138fd1498Szrj 			 "could not find an appropriate vector mask type.\n");
242238fd1498Szrj       return false;
242338fd1498Szrj     }
242438fd1498Szrj 
242538fd1498Szrj   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
242638fd1498Szrj 		TYPE_VECTOR_SUBPARTS (vectype)))
242738fd1498Szrj     {
242838fd1498Szrj       if (dump_enabled_p ())
242938fd1498Szrj 	{
243038fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
243138fd1498Szrj 			   "vector mask type ");
243238fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
243338fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION,
243438fd1498Szrj 		       " does not match vector data type ");
243538fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
243638fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
243738fd1498Szrj 	}
243838fd1498Szrj       return false;
243938fd1498Szrj     }
244038fd1498Szrj 
244138fd1498Szrj   *mask_dt_out = mask_dt;
244238fd1498Szrj   *mask_vectype_out = mask_vectype;
244338fd1498Szrj   return true;
244438fd1498Szrj }
244538fd1498Szrj 
244638fd1498Szrj /* Return true if stored value RHS is suitable for vectorizing store
244738fd1498Szrj    statement STMT.  When returning true, store the type of the
244838fd1498Szrj    definition in *RHS_DT_OUT, the type of the vectorized store value in
244938fd1498Szrj    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
245038fd1498Szrj 
245138fd1498Szrj static bool
vect_check_store_rhs(gimple * stmt,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)245238fd1498Szrj vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
245338fd1498Szrj 		      tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
245438fd1498Szrj {
245538fd1498Szrj   /* In the case this is a store from a constant make sure
245638fd1498Szrj      native_encode_expr can handle it.  */
245738fd1498Szrj   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
245838fd1498Szrj     {
245938fd1498Szrj       if (dump_enabled_p ())
246038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
246138fd1498Szrj 			 "cannot encode constant as a byte sequence.\n");
246238fd1498Szrj       return false;
246338fd1498Szrj     }
246438fd1498Szrj 
246538fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
246638fd1498Szrj   gimple *def_stmt;
246738fd1498Szrj   enum vect_def_type rhs_dt;
246838fd1498Szrj   tree rhs_vectype;
246938fd1498Szrj   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
247038fd1498Szrj 			   &rhs_vectype))
247138fd1498Szrj     {
247238fd1498Szrj       if (dump_enabled_p ())
247338fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
247438fd1498Szrj 			 "use not simple.\n");
247538fd1498Szrj       return false;
247638fd1498Szrj     }
247738fd1498Szrj 
247838fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
247938fd1498Szrj   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
248038fd1498Szrj     {
248138fd1498Szrj       if (dump_enabled_p ())
248238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
248338fd1498Szrj 			 "incompatible vector types.\n");
248438fd1498Szrj       return false;
248538fd1498Szrj     }
248638fd1498Szrj 
248738fd1498Szrj   *rhs_dt_out = rhs_dt;
248838fd1498Szrj   *rhs_vectype_out = rhs_vectype;
248938fd1498Szrj   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
249038fd1498Szrj     *vls_type_out = VLS_STORE_INVARIANT;
249138fd1498Szrj   else
249238fd1498Szrj     *vls_type_out = VLS_STORE;
249338fd1498Szrj   return true;
249438fd1498Szrj }
249538fd1498Szrj 
249638fd1498Szrj /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
249738fd1498Szrj    Note that we support masks with floating-point type, in which case the
249838fd1498Szrj    floats are interpreted as a bitmask.  */
249938fd1498Szrj 
250038fd1498Szrj static tree
vect_build_all_ones_mask(gimple * stmt,tree masktype)250138fd1498Szrj vect_build_all_ones_mask (gimple *stmt, tree masktype)
250238fd1498Szrj {
250338fd1498Szrj   if (TREE_CODE (masktype) == INTEGER_TYPE)
250438fd1498Szrj     return build_int_cst (masktype, -1);
250538fd1498Szrj   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
250638fd1498Szrj     {
250738fd1498Szrj       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
250838fd1498Szrj       mask = build_vector_from_val (masktype, mask);
250938fd1498Szrj       return vect_init_vector (stmt, mask, masktype, NULL);
251038fd1498Szrj     }
251138fd1498Szrj   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
251238fd1498Szrj     {
251338fd1498Szrj       REAL_VALUE_TYPE r;
251438fd1498Szrj       long tmp[6];
251538fd1498Szrj       for (int j = 0; j < 6; ++j)
251638fd1498Szrj 	tmp[j] = -1;
251738fd1498Szrj       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
251838fd1498Szrj       tree mask = build_real (TREE_TYPE (masktype), r);
251938fd1498Szrj       mask = build_vector_from_val (masktype, mask);
252038fd1498Szrj       return vect_init_vector (stmt, mask, masktype, NULL);
252138fd1498Szrj     }
252238fd1498Szrj   gcc_unreachable ();
252338fd1498Szrj }
252438fd1498Szrj 
252538fd1498Szrj /* Build an all-zero merge value of type VECTYPE while vectorizing
252638fd1498Szrj    STMT as a gather load.  */
252738fd1498Szrj 
252838fd1498Szrj static tree
vect_build_zero_merge_argument(gimple * stmt,tree vectype)252938fd1498Szrj vect_build_zero_merge_argument (gimple *stmt, tree vectype)
253038fd1498Szrj {
253138fd1498Szrj   tree merge;
253238fd1498Szrj   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
253338fd1498Szrj     merge = build_int_cst (TREE_TYPE (vectype), 0);
253438fd1498Szrj   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
253538fd1498Szrj     {
253638fd1498Szrj       REAL_VALUE_TYPE r;
253738fd1498Szrj       long tmp[6];
253838fd1498Szrj       for (int j = 0; j < 6; ++j)
253938fd1498Szrj 	tmp[j] = 0;
254038fd1498Szrj       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
254138fd1498Szrj       merge = build_real (TREE_TYPE (vectype), r);
254238fd1498Szrj     }
254338fd1498Szrj   else
254438fd1498Szrj     gcc_unreachable ();
254538fd1498Szrj   merge = build_vector_from_val (vectype, merge);
254638fd1498Szrj   return vect_init_vector (stmt, merge, vectype, NULL);
254738fd1498Szrj }
254838fd1498Szrj 
254938fd1498Szrj /* Build a gather load call while vectorizing STMT.  Insert new instructions
255038fd1498Szrj    before GSI and add them to VEC_STMT.  GS_INFO describes the gather load
255138fd1498Szrj    operation.  If the load is conditional, MASK is the unvectorized
255238fd1498Szrj    condition and MASK_DT is its definition type, otherwise MASK is null.  */
255338fd1498Szrj 
255438fd1498Szrj static void
vect_build_gather_load_calls(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask,vect_def_type mask_dt)255538fd1498Szrj vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
255638fd1498Szrj 			      gimple **vec_stmt, gather_scatter_info *gs_info,
255738fd1498Szrj 			      tree mask, vect_def_type mask_dt)
255838fd1498Szrj {
255938fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256038fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
256138fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
256238fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
256338fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
256438fd1498Szrj   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
256538fd1498Szrj   edge pe = loop_preheader_edge (loop);
256638fd1498Szrj   enum { NARROW, NONE, WIDEN } modifier;
256738fd1498Szrj   poly_uint64 gather_off_nunits
256838fd1498Szrj     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
256938fd1498Szrj 
257038fd1498Szrj   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
257138fd1498Szrj   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
257238fd1498Szrj   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257338fd1498Szrj   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257438fd1498Szrj   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257538fd1498Szrj   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
257638fd1498Szrj   tree scaletype = TREE_VALUE (arglist);
257738fd1498Szrj   gcc_checking_assert (types_compatible_p (srctype, rettype)
257838fd1498Szrj 		       && (!mask || types_compatible_p (srctype, masktype)));
257938fd1498Szrj 
258038fd1498Szrj   tree perm_mask = NULL_TREE;
258138fd1498Szrj   tree mask_perm_mask = NULL_TREE;
258238fd1498Szrj   if (known_eq (nunits, gather_off_nunits))
258338fd1498Szrj     modifier = NONE;
258438fd1498Szrj   else if (known_eq (nunits * 2, gather_off_nunits))
258538fd1498Szrj     {
258638fd1498Szrj       modifier = WIDEN;
258738fd1498Szrj 
258838fd1498Szrj       /* Currently widening gathers and scatters are only supported for
258938fd1498Szrj 	 fixed-length vectors.  */
259038fd1498Szrj       int count = gather_off_nunits.to_constant ();
259138fd1498Szrj       vec_perm_builder sel (count, count, 1);
259238fd1498Szrj       for (int i = 0; i < count; ++i)
259338fd1498Szrj 	sel.quick_push (i | (count / 2));
259438fd1498Szrj 
259538fd1498Szrj       vec_perm_indices indices (sel, 1, count);
259638fd1498Szrj       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
259738fd1498Szrj 					      indices);
259838fd1498Szrj     }
259938fd1498Szrj   else if (known_eq (nunits, gather_off_nunits * 2))
260038fd1498Szrj     {
260138fd1498Szrj       modifier = NARROW;
260238fd1498Szrj 
260338fd1498Szrj       /* Currently narrowing gathers and scatters are only supported for
260438fd1498Szrj 	 fixed-length vectors.  */
260538fd1498Szrj       int count = nunits.to_constant ();
260638fd1498Szrj       vec_perm_builder sel (count, count, 1);
260738fd1498Szrj       sel.quick_grow (count);
260838fd1498Szrj       for (int i = 0; i < count; ++i)
260938fd1498Szrj 	sel[i] = i < count / 2 ? i : i + count / 2;
261038fd1498Szrj       vec_perm_indices indices (sel, 2, count);
261138fd1498Szrj       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
261238fd1498Szrj 
261338fd1498Szrj       ncopies *= 2;
261438fd1498Szrj 
261538fd1498Szrj       if (mask)
261638fd1498Szrj 	{
261738fd1498Szrj 	  for (int i = 0; i < count; ++i)
261838fd1498Szrj 	    sel[i] = i | (count / 2);
261938fd1498Szrj 	  indices.new_vector (sel, 2, count);
262038fd1498Szrj 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
262138fd1498Szrj 	}
262238fd1498Szrj     }
262338fd1498Szrj   else
262438fd1498Szrj     gcc_unreachable ();
262538fd1498Szrj 
262638fd1498Szrj   tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
262738fd1498Szrj 					       vectype);
262838fd1498Szrj 
262938fd1498Szrj   tree ptr = fold_convert (ptrtype, gs_info->base);
263038fd1498Szrj   if (!is_gimple_min_invariant (ptr))
263138fd1498Szrj     {
263238fd1498Szrj       gimple_seq seq;
263338fd1498Szrj       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
263438fd1498Szrj       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
263538fd1498Szrj       gcc_assert (!new_bb);
263638fd1498Szrj     }
263738fd1498Szrj 
263838fd1498Szrj   tree scale = build_int_cst (scaletype, gs_info->scale);
263938fd1498Szrj 
264038fd1498Szrj   tree vec_oprnd0 = NULL_TREE;
264138fd1498Szrj   tree vec_mask = NULL_TREE;
264238fd1498Szrj   tree src_op = NULL_TREE;
264338fd1498Szrj   tree mask_op = NULL_TREE;
264438fd1498Szrj   tree prev_res = NULL_TREE;
264538fd1498Szrj   stmt_vec_info prev_stmt_info = NULL;
264638fd1498Szrj 
264738fd1498Szrj   if (!mask)
264838fd1498Szrj     {
264938fd1498Szrj       src_op = vect_build_zero_merge_argument (stmt, rettype);
265038fd1498Szrj       mask_op = vect_build_all_ones_mask (stmt, masktype);
265138fd1498Szrj     }
265238fd1498Szrj 
265338fd1498Szrj   for (int j = 0; j < ncopies; ++j)
265438fd1498Szrj     {
265538fd1498Szrj       tree op, var;
265638fd1498Szrj       gimple *new_stmt;
265738fd1498Szrj       if (modifier == WIDEN && (j & 1))
265838fd1498Szrj 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
265938fd1498Szrj 				   perm_mask, stmt, gsi);
266038fd1498Szrj       else if (j == 0)
266138fd1498Szrj 	op = vec_oprnd0
266238fd1498Szrj 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt);
266338fd1498Szrj       else
266438fd1498Szrj 	op = vec_oprnd0
266538fd1498Szrj 	  = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
266638fd1498Szrj 
266738fd1498Szrj       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
266838fd1498Szrj 	{
266938fd1498Szrj 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
267038fd1498Szrj 				TYPE_VECTOR_SUBPARTS (idxtype)));
267138fd1498Szrj 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
267238fd1498Szrj 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
267338fd1498Szrj 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
267438fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
267538fd1498Szrj 	  op = var;
267638fd1498Szrj 	}
267738fd1498Szrj 
267838fd1498Szrj       if (mask)
267938fd1498Szrj 	{
268038fd1498Szrj 	  if (mask_perm_mask && (j & 1))
268138fd1498Szrj 	    mask_op = permute_vec_elements (mask_op, mask_op,
268238fd1498Szrj 					    mask_perm_mask, stmt, gsi);
268338fd1498Szrj 	  else
268438fd1498Szrj 	    {
268538fd1498Szrj 	      if (j == 0)
268638fd1498Szrj 		vec_mask = vect_get_vec_def_for_operand (mask, stmt);
268738fd1498Szrj 	      else
268838fd1498Szrj 		vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
268938fd1498Szrj 
269038fd1498Szrj 	      mask_op = vec_mask;
269138fd1498Szrj 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
269238fd1498Szrj 		{
269338fd1498Szrj 		  gcc_assert
269438fd1498Szrj 		    (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
269538fd1498Szrj 			       TYPE_VECTOR_SUBPARTS (masktype)));
269638fd1498Szrj 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
269738fd1498Szrj 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
269838fd1498Szrj 		  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
269938fd1498Szrj 						  mask_op);
270038fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
270138fd1498Szrj 		  mask_op = var;
270238fd1498Szrj 		}
270338fd1498Szrj 	    }
270438fd1498Szrj 	  src_op = mask_op;
270538fd1498Szrj 	}
270638fd1498Szrj 
270738fd1498Szrj       new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
270838fd1498Szrj 				    mask_op, scale);
270938fd1498Szrj 
271038fd1498Szrj       if (!useless_type_conversion_p (vectype, rettype))
271138fd1498Szrj 	{
271238fd1498Szrj 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
271338fd1498Szrj 				TYPE_VECTOR_SUBPARTS (rettype)));
271438fd1498Szrj 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
271538fd1498Szrj 	  gimple_call_set_lhs (new_stmt, op);
271638fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
271738fd1498Szrj 	  var = make_ssa_name (vec_dest);
271838fd1498Szrj 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
271938fd1498Szrj 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
272038fd1498Szrj 	}
272138fd1498Szrj       else
272238fd1498Szrj 	{
272338fd1498Szrj 	  var = make_ssa_name (vec_dest, new_stmt);
272438fd1498Szrj 	  gimple_call_set_lhs (new_stmt, var);
272538fd1498Szrj 	}
272638fd1498Szrj 
272738fd1498Szrj       vect_finish_stmt_generation (stmt, new_stmt, gsi);
272838fd1498Szrj 
272938fd1498Szrj       if (modifier == NARROW)
273038fd1498Szrj 	{
273138fd1498Szrj 	  if ((j & 1) == 0)
273238fd1498Szrj 	    {
273338fd1498Szrj 	      prev_res = var;
273438fd1498Szrj 	      continue;
273538fd1498Szrj 	    }
273638fd1498Szrj 	  var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
273738fd1498Szrj 	  new_stmt = SSA_NAME_DEF_STMT (var);
273838fd1498Szrj 	}
273938fd1498Szrj 
274038fd1498Szrj       if (prev_stmt_info == NULL)
274138fd1498Szrj 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
274238fd1498Szrj       else
274338fd1498Szrj 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
274438fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
274538fd1498Szrj     }
274638fd1498Szrj }
274738fd1498Szrj 
274838fd1498Szrj /* Prepare the base and offset in GS_INFO for vectorization.
274938fd1498Szrj    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
275038fd1498Szrj    to the vectorized offset argument for the first copy of STMT.  STMT
275138fd1498Szrj    is the statement described by GS_INFO and LOOP is the containing loop.  */
275238fd1498Szrj 
275338fd1498Szrj static void
vect_get_gather_scatter_ops(struct loop * loop,gimple * stmt,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)275438fd1498Szrj vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
275538fd1498Szrj 			     gather_scatter_info *gs_info,
275638fd1498Szrj 			     tree *dataref_ptr, tree *vec_offset)
275738fd1498Szrj {
275838fd1498Szrj   gimple_seq stmts = NULL;
275938fd1498Szrj   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
276038fd1498Szrj   if (stmts != NULL)
276138fd1498Szrj     {
276238fd1498Szrj       basic_block new_bb;
276338fd1498Szrj       edge pe = loop_preheader_edge (loop);
276438fd1498Szrj       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
276538fd1498Szrj       gcc_assert (!new_bb);
276638fd1498Szrj     }
276738fd1498Szrj   tree offset_type = TREE_TYPE (gs_info->offset);
276838fd1498Szrj   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
276938fd1498Szrj   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
277038fd1498Szrj 					      offset_vectype);
277138fd1498Szrj }
277238fd1498Szrj 
277338fd1498Szrj /* Prepare to implement a grouped or strided load or store using
277438fd1498Szrj    the gather load or scatter store operation described by GS_INFO.
277538fd1498Szrj    STMT is the load or store statement.
277638fd1498Szrj 
277738fd1498Szrj    Set *DATAREF_BUMP to the amount that should be added to the base
277838fd1498Szrj    address after each copy of the vectorized statement.  Set *VEC_OFFSET
277938fd1498Szrj    to an invariant offset vector in which element I has the value
278038fd1498Szrj    I * DR_STEP / SCALE.  */
278138fd1498Szrj 
278238fd1498Szrj static void
vect_get_strided_load_store_ops(gimple * stmt,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)278338fd1498Szrj vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
278438fd1498Szrj 				 gather_scatter_info *gs_info,
278538fd1498Szrj 				 tree *dataref_bump, tree *vec_offset)
278638fd1498Szrj {
278738fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
278838fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
278938fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
279038fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
279138fd1498Szrj   gimple_seq stmts;
279238fd1498Szrj 
279338fd1498Szrj   tree bump = size_binop (MULT_EXPR,
279438fd1498Szrj 			  fold_convert (sizetype, DR_STEP (dr)),
279538fd1498Szrj 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
279638fd1498Szrj   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
279738fd1498Szrj   if (stmts)
279838fd1498Szrj     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
279938fd1498Szrj 
280038fd1498Szrj   /* The offset given in GS_INFO can have pointer type, so use the element
280138fd1498Szrj      type of the vector instead.  */
280238fd1498Szrj   tree offset_type = TREE_TYPE (gs_info->offset);
280338fd1498Szrj   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
280438fd1498Szrj   offset_type = TREE_TYPE (offset_vectype);
280538fd1498Szrj 
280638fd1498Szrj   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
280738fd1498Szrj   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
280838fd1498Szrj 			  ssize_int (gs_info->scale));
280938fd1498Szrj   step = fold_convert (offset_type, step);
281038fd1498Szrj   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
281138fd1498Szrj 
281238fd1498Szrj   /* Create {0, X, X*2, X*3, ...}.  */
281338fd1498Szrj   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
281438fd1498Szrj 			      build_zero_cst (offset_type), step);
281538fd1498Szrj   if (stmts)
281638fd1498Szrj     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
281738fd1498Szrj }
281838fd1498Szrj 
281938fd1498Szrj /* Return the amount that should be added to a vector pointer to move
282038fd1498Szrj    to the next or previous copy of AGGR_TYPE.  DR is the data reference
282138fd1498Szrj    being vectorized and MEMORY_ACCESS_TYPE describes the type of
282238fd1498Szrj    vectorization.  */
282338fd1498Szrj 
282438fd1498Szrj static tree
vect_get_data_ptr_increment(data_reference * dr,tree aggr_type,vect_memory_access_type memory_access_type)282538fd1498Szrj vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
282638fd1498Szrj 			     vect_memory_access_type memory_access_type)
282738fd1498Szrj {
282838fd1498Szrj   if (memory_access_type == VMAT_INVARIANT)
282938fd1498Szrj     return size_zero_node;
283038fd1498Szrj 
283138fd1498Szrj   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
283238fd1498Szrj   tree step = vect_dr_behavior (dr)->step;
283338fd1498Szrj   if (tree_int_cst_sgn (step) == -1)
283438fd1498Szrj     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
283538fd1498Szrj   return iv_step;
283638fd1498Szrj }
283738fd1498Szrj 
283838fd1498Szrj /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
283938fd1498Szrj 
284038fd1498Szrj static bool
vectorizable_bswap(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,tree vectype_in,enum vect_def_type * dt)284138fd1498Szrj vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
284238fd1498Szrj 		    gimple **vec_stmt, slp_tree slp_node,
284338fd1498Szrj 		    tree vectype_in, enum vect_def_type *dt)
284438fd1498Szrj {
284538fd1498Szrj   tree op, vectype;
284638fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
284738fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
284838fd1498Szrj   unsigned ncopies;
284938fd1498Szrj   unsigned HOST_WIDE_INT nunits, num_bytes;
285038fd1498Szrj 
285138fd1498Szrj   op = gimple_call_arg (stmt, 0);
285238fd1498Szrj   vectype = STMT_VINFO_VECTYPE (stmt_info);
285338fd1498Szrj 
285438fd1498Szrj   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
285538fd1498Szrj     return false;
285638fd1498Szrj 
285738fd1498Szrj   /* Multiple types in SLP are handled by creating the appropriate number of
285838fd1498Szrj      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
285938fd1498Szrj      case of SLP.  */
286038fd1498Szrj   if (slp_node)
286138fd1498Szrj     ncopies = 1;
286238fd1498Szrj   else
286338fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
286438fd1498Szrj 
286538fd1498Szrj   gcc_assert (ncopies >= 1);
286638fd1498Szrj 
286738fd1498Szrj   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
286838fd1498Szrj   if (! char_vectype)
286938fd1498Szrj     return false;
287038fd1498Szrj 
287138fd1498Szrj   if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
287238fd1498Szrj     return false;
287338fd1498Szrj 
287438fd1498Szrj   unsigned word_bytes = num_bytes / nunits;
287538fd1498Szrj 
287638fd1498Szrj   /* The encoding uses one stepped pattern for each byte in the word.  */
287738fd1498Szrj   vec_perm_builder elts (num_bytes, word_bytes, 3);
287838fd1498Szrj   for (unsigned i = 0; i < 3; ++i)
287938fd1498Szrj     for (unsigned j = 0; j < word_bytes; ++j)
288038fd1498Szrj       elts.quick_push ((i + 1) * word_bytes - j - 1);
288138fd1498Szrj 
288238fd1498Szrj   vec_perm_indices indices (elts, 1, num_bytes);
288338fd1498Szrj   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
288438fd1498Szrj     return false;
288538fd1498Szrj 
288638fd1498Szrj   if (! vec_stmt)
288738fd1498Szrj     {
288838fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
288938fd1498Szrj       if (dump_enabled_p ())
289038fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
289138fd1498Szrj                          "\n");
289238fd1498Szrj       if (! slp_node)
289338fd1498Szrj 	{
289438fd1498Szrj 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
289538fd1498Szrj 			 1, vector_stmt, stmt_info, 0, vect_prologue);
289638fd1498Szrj 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
289738fd1498Szrj 			 ncopies, vec_perm, stmt_info, 0, vect_body);
289838fd1498Szrj 	}
289938fd1498Szrj       return true;
290038fd1498Szrj     }
290138fd1498Szrj 
290238fd1498Szrj   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
290338fd1498Szrj 
290438fd1498Szrj   /* Transform.  */
290538fd1498Szrj   vec<tree> vec_oprnds = vNULL;
290638fd1498Szrj   gimple *new_stmt = NULL;
290738fd1498Szrj   stmt_vec_info prev_stmt_info = NULL;
290838fd1498Szrj   for (unsigned j = 0; j < ncopies; j++)
290938fd1498Szrj     {
291038fd1498Szrj       /* Handle uses.  */
291138fd1498Szrj       if (j == 0)
291238fd1498Szrj         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
291338fd1498Szrj       else
291438fd1498Szrj         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
291538fd1498Szrj 
291638fd1498Szrj       /* Arguments are ready. create the new vector stmt.  */
291738fd1498Szrj       unsigned i;
291838fd1498Szrj       tree vop;
291938fd1498Szrj       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
292038fd1498Szrj        {
292138fd1498Szrj 	 tree tem = make_ssa_name (char_vectype);
292238fd1498Szrj 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
292338fd1498Szrj 						      char_vectype, vop));
292438fd1498Szrj 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
292538fd1498Szrj 	 tree tem2 = make_ssa_name (char_vectype);
292638fd1498Szrj 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
292738fd1498Szrj 					 tem, tem, bswap_vconst);
292838fd1498Szrj 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
292938fd1498Szrj 	 tem = make_ssa_name (vectype);
293038fd1498Szrj 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
293138fd1498Szrj 						      vectype, tem2));
293238fd1498Szrj 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
293338fd1498Szrj          if (slp_node)
293438fd1498Szrj            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
293538fd1498Szrj        }
293638fd1498Szrj 
293738fd1498Szrj       if (slp_node)
293838fd1498Szrj         continue;
293938fd1498Szrj 
294038fd1498Szrj       if (j == 0)
294138fd1498Szrj         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
294238fd1498Szrj       else
294338fd1498Szrj         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
294438fd1498Szrj 
294538fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
294638fd1498Szrj     }
294738fd1498Szrj 
294838fd1498Szrj   vec_oprnds.release ();
294938fd1498Szrj   return true;
295038fd1498Szrj }
295138fd1498Szrj 
295238fd1498Szrj /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
295338fd1498Szrj    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
295438fd1498Szrj    in a single step.  On success, store the binary pack code in
295538fd1498Szrj    *CONVERT_CODE.  */
295638fd1498Szrj 
295738fd1498Szrj static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)295838fd1498Szrj simple_integer_narrowing (tree vectype_out, tree vectype_in,
295938fd1498Szrj 			  tree_code *convert_code)
296038fd1498Szrj {
296138fd1498Szrj   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
296238fd1498Szrj       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
296338fd1498Szrj     return false;
296438fd1498Szrj 
296538fd1498Szrj   tree_code code;
296638fd1498Szrj   int multi_step_cvt = 0;
296738fd1498Szrj   auto_vec <tree, 8> interm_types;
296838fd1498Szrj   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
296938fd1498Szrj 					&code, &multi_step_cvt,
297038fd1498Szrj 					&interm_types)
297138fd1498Szrj       || multi_step_cvt)
297238fd1498Szrj     return false;
297338fd1498Szrj 
297438fd1498Szrj   *convert_code = code;
297538fd1498Szrj   return true;
297638fd1498Szrj }
297738fd1498Szrj 
297838fd1498Szrj /* Function vectorizable_call.
297938fd1498Szrj 
298038fd1498Szrj    Check if GS performs a function call that can be vectorized.
298138fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
298238fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
298338fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
298438fd1498Szrj 
298538fd1498Szrj static bool
vectorizable_call(gimple * gs,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)298638fd1498Szrj vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
298738fd1498Szrj 		   slp_tree slp_node)
298838fd1498Szrj {
298938fd1498Szrj   gcall *stmt;
299038fd1498Szrj   tree vec_dest;
299138fd1498Szrj   tree scalar_dest;
299238fd1498Szrj   tree op, type;
299338fd1498Szrj   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
299438fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
299538fd1498Szrj   tree vectype_out, vectype_in;
299638fd1498Szrj   poly_uint64 nunits_in;
299738fd1498Szrj   poly_uint64 nunits_out;
299838fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
299938fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
300038fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
300138fd1498Szrj   tree fndecl, new_temp, rhs_type;
300238fd1498Szrj   gimple *def_stmt;
300338fd1498Szrj   enum vect_def_type dt[3]
300438fd1498Szrj     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
300538fd1498Szrj   int ndts = 3;
300638fd1498Szrj   gimple *new_stmt = NULL;
300738fd1498Szrj   int ncopies, j;
300838fd1498Szrj   vec<tree> vargs = vNULL;
300938fd1498Szrj   enum { NARROW, NONE, WIDEN } modifier;
301038fd1498Szrj   size_t i, nargs;
301138fd1498Szrj   tree lhs;
301238fd1498Szrj 
301338fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
301438fd1498Szrj     return false;
301538fd1498Szrj 
301638fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
301738fd1498Szrj       && ! vec_stmt)
301838fd1498Szrj     return false;
301938fd1498Szrj 
302038fd1498Szrj   /* Is GS a vectorizable call?   */
302138fd1498Szrj   stmt = dyn_cast <gcall *> (gs);
302238fd1498Szrj   if (!stmt)
302338fd1498Szrj     return false;
302438fd1498Szrj 
302538fd1498Szrj   if (gimple_call_internal_p (stmt)
302638fd1498Szrj       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
302738fd1498Szrj 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
302838fd1498Szrj     /* Handled by vectorizable_load and vectorizable_store.  */
302938fd1498Szrj     return false;
303038fd1498Szrj 
303138fd1498Szrj   if (gimple_call_lhs (stmt) == NULL_TREE
303238fd1498Szrj       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
303338fd1498Szrj     return false;
303438fd1498Szrj 
303538fd1498Szrj   gcc_checking_assert (!stmt_can_throw_internal (stmt));
303638fd1498Szrj 
303738fd1498Szrj   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
303838fd1498Szrj 
303938fd1498Szrj   /* Process function arguments.  */
304038fd1498Szrj   rhs_type = NULL_TREE;
304138fd1498Szrj   vectype_in = NULL_TREE;
304238fd1498Szrj   nargs = gimple_call_num_args (stmt);
304338fd1498Szrj 
304438fd1498Szrj   /* Bail out if the function has more than three arguments, we do not have
304538fd1498Szrj      interesting builtin functions to vectorize with more than two arguments
304638fd1498Szrj      except for fma.  No arguments is also not good.  */
304738fd1498Szrj   if (nargs == 0 || nargs > 3)
304838fd1498Szrj     return false;
304938fd1498Szrj 
305038fd1498Szrj   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
305138fd1498Szrj   if (gimple_call_internal_p (stmt)
305238fd1498Szrj       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
305338fd1498Szrj     {
305438fd1498Szrj       nargs = 0;
305538fd1498Szrj       rhs_type = unsigned_type_node;
305638fd1498Szrj     }
305738fd1498Szrj 
305838fd1498Szrj   for (i = 0; i < nargs; i++)
305938fd1498Szrj     {
306038fd1498Szrj       tree opvectype;
306138fd1498Szrj 
306238fd1498Szrj       op = gimple_call_arg (stmt, i);
306338fd1498Szrj 
306438fd1498Szrj       /* We can only handle calls with arguments of the same type.  */
306538fd1498Szrj       if (rhs_type
306638fd1498Szrj 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
306738fd1498Szrj 	{
306838fd1498Szrj 	  if (dump_enabled_p ())
306938fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
307038fd1498Szrj                              "argument types differ.\n");
307138fd1498Szrj 	  return false;
307238fd1498Szrj 	}
307338fd1498Szrj       if (!rhs_type)
307438fd1498Szrj 	rhs_type = TREE_TYPE (op);
307538fd1498Szrj 
307638fd1498Szrj       if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
307738fd1498Szrj 	{
307838fd1498Szrj 	  if (dump_enabled_p ())
307938fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
308038fd1498Szrj                              "use not simple.\n");
308138fd1498Szrj 	  return false;
308238fd1498Szrj 	}
308338fd1498Szrj 
308438fd1498Szrj       if (!vectype_in)
308538fd1498Szrj 	vectype_in = opvectype;
308638fd1498Szrj       else if (opvectype
308738fd1498Szrj 	       && opvectype != vectype_in)
308838fd1498Szrj 	{
308938fd1498Szrj 	  if (dump_enabled_p ())
309038fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309138fd1498Szrj                              "argument vector types differ.\n");
309238fd1498Szrj 	  return false;
309338fd1498Szrj 	}
309438fd1498Szrj     }
309538fd1498Szrj   /* If all arguments are external or constant defs use a vector type with
309638fd1498Szrj      the same size as the output vector type.  */
309738fd1498Szrj   if (!vectype_in)
309838fd1498Szrj     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
309938fd1498Szrj   if (vec_stmt)
310038fd1498Szrj     gcc_assert (vectype_in);
310138fd1498Szrj   if (!vectype_in)
310238fd1498Szrj     {
310338fd1498Szrj       if (dump_enabled_p ())
310438fd1498Szrj         {
310538fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310638fd1498Szrj                            "no vectype for scalar type ");
310738fd1498Szrj           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
310838fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
310938fd1498Szrj         }
311038fd1498Szrj 
311138fd1498Szrj       return false;
311238fd1498Szrj     }
311338fd1498Szrj 
311438fd1498Szrj   /* FORNOW */
311538fd1498Szrj   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
311638fd1498Szrj   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
311738fd1498Szrj   if (known_eq (nunits_in * 2, nunits_out))
311838fd1498Szrj     modifier = NARROW;
311938fd1498Szrj   else if (known_eq (nunits_out, nunits_in))
312038fd1498Szrj     modifier = NONE;
312138fd1498Szrj   else if (known_eq (nunits_out * 2, nunits_in))
312238fd1498Szrj     modifier = WIDEN;
312338fd1498Szrj   else
312438fd1498Szrj     return false;
312538fd1498Szrj 
312638fd1498Szrj   /* We only handle functions that do not read or clobber memory.  */
312738fd1498Szrj   if (gimple_vuse (stmt))
312838fd1498Szrj     {
312938fd1498Szrj       if (dump_enabled_p ())
313038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
313138fd1498Szrj 			 "function reads from or writes to memory.\n");
313238fd1498Szrj       return false;
313338fd1498Szrj     }
313438fd1498Szrj 
313538fd1498Szrj   /* For now, we only vectorize functions if a target specific builtin
313638fd1498Szrj      is available.  TODO -- in some cases, it might be profitable to
313738fd1498Szrj      insert the calls for pieces of the vector, in order to be able
313838fd1498Szrj      to vectorize other operations in the loop.  */
313938fd1498Szrj   fndecl = NULL_TREE;
314038fd1498Szrj   internal_fn ifn = IFN_LAST;
314138fd1498Szrj   combined_fn cfn = gimple_call_combined_fn (stmt);
314238fd1498Szrj   tree callee = gimple_call_fndecl (stmt);
314338fd1498Szrj 
314438fd1498Szrj   /* First try using an internal function.  */
314538fd1498Szrj   tree_code convert_code = ERROR_MARK;
314638fd1498Szrj   if (cfn != CFN_LAST
314738fd1498Szrj       && (modifier == NONE
314838fd1498Szrj 	  || (modifier == NARROW
314938fd1498Szrj 	      && simple_integer_narrowing (vectype_out, vectype_in,
315038fd1498Szrj 					   &convert_code))))
315138fd1498Szrj     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
315238fd1498Szrj 					  vectype_in);
315338fd1498Szrj 
315438fd1498Szrj   /* If that fails, try asking for a target-specific built-in function.  */
315538fd1498Szrj   if (ifn == IFN_LAST)
315638fd1498Szrj     {
315738fd1498Szrj       if (cfn != CFN_LAST)
315838fd1498Szrj 	fndecl = targetm.vectorize.builtin_vectorized_function
315938fd1498Szrj 	  (cfn, vectype_out, vectype_in);
316038fd1498Szrj       else if (callee)
316138fd1498Szrj 	fndecl = targetm.vectorize.builtin_md_vectorized_function
316238fd1498Szrj 	  (callee, vectype_out, vectype_in);
316338fd1498Szrj     }
316438fd1498Szrj 
316538fd1498Szrj   if (ifn == IFN_LAST && !fndecl)
316638fd1498Szrj     {
316738fd1498Szrj       if (cfn == CFN_GOMP_SIMD_LANE
316838fd1498Szrj 	  && !slp_node
316938fd1498Szrj 	  && loop_vinfo
317038fd1498Szrj 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
317138fd1498Szrj 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
317238fd1498Szrj 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
317338fd1498Szrj 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
317438fd1498Szrj 	{
317538fd1498Szrj 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
317638fd1498Szrj 	     { 0, 1, 2, ... vf - 1 } vector.  */
317738fd1498Szrj 	  gcc_assert (nargs == 0);
317838fd1498Szrj 	}
317938fd1498Szrj       else if (modifier == NONE
318038fd1498Szrj 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
318138fd1498Szrj 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
318238fd1498Szrj 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
318338fd1498Szrj 	return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
318438fd1498Szrj 				   vectype_in, dt);
318538fd1498Szrj       else
318638fd1498Szrj 	{
318738fd1498Szrj 	  if (dump_enabled_p ())
318838fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
318938fd1498Szrj 			     "function is not vectorizable.\n");
319038fd1498Szrj 	  return false;
319138fd1498Szrj 	}
319238fd1498Szrj     }
319338fd1498Szrj 
319438fd1498Szrj   if (slp_node)
319538fd1498Szrj     ncopies = 1;
319638fd1498Szrj   else if (modifier == NARROW && ifn == IFN_LAST)
319738fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
319838fd1498Szrj   else
319938fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
320038fd1498Szrj 
320138fd1498Szrj   /* Sanity check: make sure that at least one copy of the vectorized stmt
320238fd1498Szrj      needs to be generated.  */
320338fd1498Szrj   gcc_assert (ncopies >= 1);
320438fd1498Szrj 
320538fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
320638fd1498Szrj     {
320738fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
320838fd1498Szrj       if (dump_enabled_p ())
320938fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
321038fd1498Szrj                          "\n");
321138fd1498Szrj       if (!slp_node)
321238fd1498Szrj 	{
321338fd1498Szrj 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
321438fd1498Szrj 	  if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
321538fd1498Szrj 	    add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
321638fd1498Szrj 			   vec_promote_demote, stmt_info, 0, vect_body);
321738fd1498Szrj 	}
321838fd1498Szrj 
321938fd1498Szrj       return true;
322038fd1498Szrj     }
322138fd1498Szrj 
322238fd1498Szrj   /* Transform.  */
322338fd1498Szrj 
322438fd1498Szrj   if (dump_enabled_p ())
322538fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
322638fd1498Szrj 
322738fd1498Szrj   /* Handle def.  */
322838fd1498Szrj   scalar_dest = gimple_call_lhs (stmt);
322938fd1498Szrj   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
323038fd1498Szrj 
323138fd1498Szrj   prev_stmt_info = NULL;
323238fd1498Szrj   if (modifier == NONE || ifn != IFN_LAST)
323338fd1498Szrj     {
323438fd1498Szrj       tree prev_res = NULL_TREE;
323538fd1498Szrj       for (j = 0; j < ncopies; ++j)
323638fd1498Szrj 	{
323738fd1498Szrj 	  /* Build argument list for the vectorized call.  */
323838fd1498Szrj 	  if (j == 0)
323938fd1498Szrj 	    vargs.create (nargs);
324038fd1498Szrj 	  else
324138fd1498Szrj 	    vargs.truncate (0);
324238fd1498Szrj 
324338fd1498Szrj 	  if (slp_node)
324438fd1498Szrj 	    {
324538fd1498Szrj 	      auto_vec<vec<tree> > vec_defs (nargs);
324638fd1498Szrj 	      vec<tree> vec_oprnds0;
324738fd1498Szrj 
324838fd1498Szrj 	      for (i = 0; i < nargs; i++)
324938fd1498Szrj 		vargs.quick_push (gimple_call_arg (stmt, i));
325038fd1498Szrj 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
325138fd1498Szrj 	      vec_oprnds0 = vec_defs[0];
325238fd1498Szrj 
325338fd1498Szrj 	      /* Arguments are ready.  Create the new vector stmt.  */
325438fd1498Szrj 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
325538fd1498Szrj 		{
325638fd1498Szrj 		  size_t k;
325738fd1498Szrj 		  for (k = 0; k < nargs; k++)
325838fd1498Szrj 		    {
325938fd1498Szrj 		      vec<tree> vec_oprndsk = vec_defs[k];
326038fd1498Szrj 		      vargs[k] = vec_oprndsk[i];
326138fd1498Szrj 		    }
326238fd1498Szrj 		  if (modifier == NARROW)
326338fd1498Szrj 		    {
326438fd1498Szrj 		      tree half_res = make_ssa_name (vectype_in);
326538fd1498Szrj 		      gcall *call
326638fd1498Szrj 			= gimple_build_call_internal_vec (ifn, vargs);
326738fd1498Szrj 		      gimple_call_set_lhs (call, half_res);
326838fd1498Szrj 		      gimple_call_set_nothrow (call, true);
326938fd1498Szrj 		      new_stmt = call;
327038fd1498Szrj 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
327138fd1498Szrj 		      if ((i & 1) == 0)
327238fd1498Szrj 			{
327338fd1498Szrj 			  prev_res = half_res;
327438fd1498Szrj 			  continue;
327538fd1498Szrj 			}
327638fd1498Szrj 		      new_temp = make_ssa_name (vec_dest);
327738fd1498Szrj 		      new_stmt = gimple_build_assign (new_temp, convert_code,
327838fd1498Szrj 						      prev_res, half_res);
327938fd1498Szrj 		    }
328038fd1498Szrj 		  else
328138fd1498Szrj 		    {
328238fd1498Szrj 		      gcall *call;
328338fd1498Szrj 		      if (ifn != IFN_LAST)
328438fd1498Szrj 			call = gimple_build_call_internal_vec (ifn, vargs);
328538fd1498Szrj 		      else
328638fd1498Szrj 			call = gimple_build_call_vec (fndecl, vargs);
328738fd1498Szrj 		      new_temp = make_ssa_name (vec_dest, call);
328838fd1498Szrj 		      gimple_call_set_lhs (call, new_temp);
328938fd1498Szrj 		      gimple_call_set_nothrow (call, true);
329038fd1498Szrj 		      new_stmt = call;
329138fd1498Szrj 		    }
329238fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
329338fd1498Szrj 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
329438fd1498Szrj 		}
329538fd1498Szrj 
329638fd1498Szrj 	      for (i = 0; i < nargs; i++)
329738fd1498Szrj 		{
329838fd1498Szrj 		  vec<tree> vec_oprndsi = vec_defs[i];
329938fd1498Szrj 		  vec_oprndsi.release ();
330038fd1498Szrj 		}
330138fd1498Szrj 	      continue;
330238fd1498Szrj 	    }
330338fd1498Szrj 
330438fd1498Szrj 	  for (i = 0; i < nargs; i++)
330538fd1498Szrj 	    {
330638fd1498Szrj 	      op = gimple_call_arg (stmt, i);
330738fd1498Szrj 	      if (j == 0)
330838fd1498Szrj 		vec_oprnd0
330938fd1498Szrj 		  = vect_get_vec_def_for_operand (op, stmt);
331038fd1498Szrj 	      else
331138fd1498Szrj 		{
331238fd1498Szrj 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
331338fd1498Szrj 		  vec_oprnd0
331438fd1498Szrj                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
331538fd1498Szrj 		}
331638fd1498Szrj 
331738fd1498Szrj 	      vargs.quick_push (vec_oprnd0);
331838fd1498Szrj 	    }
331938fd1498Szrj 
332038fd1498Szrj 	  if (gimple_call_internal_p (stmt)
332138fd1498Szrj 	      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
332238fd1498Szrj 	    {
332338fd1498Szrj 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
332438fd1498Szrj 	      tree new_var
332538fd1498Szrj 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
332638fd1498Szrj 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
332738fd1498Szrj 	      vect_init_vector_1 (stmt, init_stmt, NULL);
332838fd1498Szrj 	      new_temp = make_ssa_name (vec_dest);
332938fd1498Szrj 	      new_stmt = gimple_build_assign (new_temp, new_var);
333038fd1498Szrj 	    }
333138fd1498Szrj 	  else if (modifier == NARROW)
333238fd1498Szrj 	    {
333338fd1498Szrj 	      tree half_res = make_ssa_name (vectype_in);
333438fd1498Szrj 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
333538fd1498Szrj 	      gimple_call_set_lhs (call, half_res);
333638fd1498Szrj 	      gimple_call_set_nothrow (call, true);
333738fd1498Szrj 	      new_stmt = call;
333838fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
333938fd1498Szrj 	      if ((j & 1) == 0)
334038fd1498Szrj 		{
334138fd1498Szrj 		  prev_res = half_res;
334238fd1498Szrj 		  continue;
334338fd1498Szrj 		}
334438fd1498Szrj 	      new_temp = make_ssa_name (vec_dest);
334538fd1498Szrj 	      new_stmt = gimple_build_assign (new_temp, convert_code,
334638fd1498Szrj 					      prev_res, half_res);
334738fd1498Szrj 	    }
334838fd1498Szrj 	  else
334938fd1498Szrj 	    {
335038fd1498Szrj 	      gcall *call;
335138fd1498Szrj 	      if (ifn != IFN_LAST)
335238fd1498Szrj 		call = gimple_build_call_internal_vec (ifn, vargs);
335338fd1498Szrj 	      else
335438fd1498Szrj 		call = gimple_build_call_vec (fndecl, vargs);
335538fd1498Szrj 	      new_temp = make_ssa_name (vec_dest, new_stmt);
335638fd1498Szrj 	      gimple_call_set_lhs (call, new_temp);
335738fd1498Szrj 	      gimple_call_set_nothrow (call, true);
335838fd1498Szrj 	      new_stmt = call;
335938fd1498Szrj 	    }
336038fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
336138fd1498Szrj 
336238fd1498Szrj 	  if (j == (modifier == NARROW ? 1 : 0))
336338fd1498Szrj 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
336438fd1498Szrj 	  else
336538fd1498Szrj 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
336638fd1498Szrj 
336738fd1498Szrj 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
336838fd1498Szrj 	}
336938fd1498Szrj     }
337038fd1498Szrj   else if (modifier == NARROW)
337138fd1498Szrj     {
337238fd1498Szrj       for (j = 0; j < ncopies; ++j)
337338fd1498Szrj 	{
337438fd1498Szrj 	  /* Build argument list for the vectorized call.  */
337538fd1498Szrj 	  if (j == 0)
337638fd1498Szrj 	    vargs.create (nargs * 2);
337738fd1498Szrj 	  else
337838fd1498Szrj 	    vargs.truncate (0);
337938fd1498Szrj 
338038fd1498Szrj 	  if (slp_node)
338138fd1498Szrj 	    {
338238fd1498Szrj 	      auto_vec<vec<tree> > vec_defs (nargs);
338338fd1498Szrj 	      vec<tree> vec_oprnds0;
338438fd1498Szrj 
338538fd1498Szrj 	      for (i = 0; i < nargs; i++)
338638fd1498Szrj 		vargs.quick_push (gimple_call_arg (stmt, i));
338738fd1498Szrj 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
338838fd1498Szrj 	      vec_oprnds0 = vec_defs[0];
338938fd1498Szrj 
339038fd1498Szrj 	      /* Arguments are ready.  Create the new vector stmt.  */
339138fd1498Szrj 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
339238fd1498Szrj 		{
339338fd1498Szrj 		  size_t k;
339438fd1498Szrj 		  vargs.truncate (0);
339538fd1498Szrj 		  for (k = 0; k < nargs; k++)
339638fd1498Szrj 		    {
339738fd1498Szrj 		      vec<tree> vec_oprndsk = vec_defs[k];
339838fd1498Szrj 		      vargs.quick_push (vec_oprndsk[i]);
339938fd1498Szrj 		      vargs.quick_push (vec_oprndsk[i + 1]);
340038fd1498Szrj 		    }
340138fd1498Szrj 		  gcall *call;
340238fd1498Szrj 		  if (ifn != IFN_LAST)
340338fd1498Szrj 		    call = gimple_build_call_internal_vec (ifn, vargs);
340438fd1498Szrj 		  else
340538fd1498Szrj 		    call = gimple_build_call_vec (fndecl, vargs);
340638fd1498Szrj 		  new_temp = make_ssa_name (vec_dest, call);
340738fd1498Szrj 		  gimple_call_set_lhs (call, new_temp);
340838fd1498Szrj 		  gimple_call_set_nothrow (call, true);
340938fd1498Szrj 		  new_stmt = call;
341038fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
341138fd1498Szrj 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
341238fd1498Szrj 		}
341338fd1498Szrj 
341438fd1498Szrj 	      for (i = 0; i < nargs; i++)
341538fd1498Szrj 		{
341638fd1498Szrj 		  vec<tree> vec_oprndsi = vec_defs[i];
341738fd1498Szrj 		  vec_oprndsi.release ();
341838fd1498Szrj 		}
341938fd1498Szrj 	      continue;
342038fd1498Szrj 	    }
342138fd1498Szrj 
342238fd1498Szrj 	  for (i = 0; i < nargs; i++)
342338fd1498Szrj 	    {
342438fd1498Szrj 	      op = gimple_call_arg (stmt, i);
342538fd1498Szrj 	      if (j == 0)
342638fd1498Szrj 		{
342738fd1498Szrj 		  vec_oprnd0
342838fd1498Szrj 		    = vect_get_vec_def_for_operand (op, stmt);
342938fd1498Szrj 		  vec_oprnd1
343038fd1498Szrj 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
343138fd1498Szrj 		}
343238fd1498Szrj 	      else
343338fd1498Szrj 		{
343438fd1498Szrj 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
343538fd1498Szrj 		  vec_oprnd0
343638fd1498Szrj 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
343738fd1498Szrj 		  vec_oprnd1
343838fd1498Szrj 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
343938fd1498Szrj 		}
344038fd1498Szrj 
344138fd1498Szrj 	      vargs.quick_push (vec_oprnd0);
344238fd1498Szrj 	      vargs.quick_push (vec_oprnd1);
344338fd1498Szrj 	    }
344438fd1498Szrj 
344538fd1498Szrj 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
344638fd1498Szrj 	  new_temp = make_ssa_name (vec_dest, new_stmt);
344738fd1498Szrj 	  gimple_call_set_lhs (new_stmt, new_temp);
344838fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
344938fd1498Szrj 
345038fd1498Szrj 	  if (j == 0)
345138fd1498Szrj 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
345238fd1498Szrj 	  else
345338fd1498Szrj 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
345438fd1498Szrj 
345538fd1498Szrj 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
345638fd1498Szrj 	}
345738fd1498Szrj 
345838fd1498Szrj       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
345938fd1498Szrj     }
346038fd1498Szrj   else
346138fd1498Szrj     /* No current target implements this case.  */
346238fd1498Szrj     return false;
346338fd1498Szrj 
346438fd1498Szrj   vargs.release ();
346538fd1498Szrj 
346638fd1498Szrj   /* The call in STMT might prevent it from being removed in dce.
346738fd1498Szrj      We however cannot remove it here, due to the way the ssa name
346838fd1498Szrj      it defines is mapped to the new definition.  So just replace
346938fd1498Szrj      rhs of the statement with something harmless.  */
347038fd1498Szrj 
347138fd1498Szrj   if (slp_node)
347238fd1498Szrj     return true;
347338fd1498Szrj 
347438fd1498Szrj   type = TREE_TYPE (scalar_dest);
347538fd1498Szrj   if (is_pattern_stmt_p (stmt_info))
347638fd1498Szrj     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
347738fd1498Szrj   else
347838fd1498Szrj     lhs = gimple_call_lhs (stmt);
347938fd1498Szrj 
348038fd1498Szrj   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
348138fd1498Szrj   set_vinfo_for_stmt (new_stmt, stmt_info);
348238fd1498Szrj   set_vinfo_for_stmt (stmt, NULL);
348338fd1498Szrj   STMT_VINFO_STMT (stmt_info) = new_stmt;
348438fd1498Szrj   gsi_replace (gsi, new_stmt, false);
348538fd1498Szrj 
348638fd1498Szrj   return true;
348738fd1498Szrj }
348838fd1498Szrj 
348938fd1498Szrj 
349038fd1498Szrj struct simd_call_arg_info
349138fd1498Szrj {
349238fd1498Szrj   tree vectype;
349338fd1498Szrj   tree op;
349438fd1498Szrj   HOST_WIDE_INT linear_step;
349538fd1498Szrj   enum vect_def_type dt;
349638fd1498Szrj   unsigned int align;
349738fd1498Szrj   bool simd_lane_linear;
349838fd1498Szrj };
349938fd1498Szrj 
350038fd1498Szrj /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
350138fd1498Szrj    is linear within simd lane (but not within whole loop), note it in
350238fd1498Szrj    *ARGINFO.  */
350338fd1498Szrj 
350438fd1498Szrj static void
vect_simd_lane_linear(tree op,struct loop * loop,struct simd_call_arg_info * arginfo)350538fd1498Szrj vect_simd_lane_linear (tree op, struct loop *loop,
350638fd1498Szrj 		       struct simd_call_arg_info *arginfo)
350738fd1498Szrj {
350838fd1498Szrj   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
350938fd1498Szrj 
351038fd1498Szrj   if (!is_gimple_assign (def_stmt)
351138fd1498Szrj       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
351238fd1498Szrj       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
351338fd1498Szrj     return;
351438fd1498Szrj 
351538fd1498Szrj   tree base = gimple_assign_rhs1 (def_stmt);
351638fd1498Szrj   HOST_WIDE_INT linear_step = 0;
351738fd1498Szrj   tree v = gimple_assign_rhs2 (def_stmt);
351838fd1498Szrj   while (TREE_CODE (v) == SSA_NAME)
351938fd1498Szrj     {
352038fd1498Szrj       tree t;
352138fd1498Szrj       def_stmt = SSA_NAME_DEF_STMT (v);
352238fd1498Szrj       if (is_gimple_assign (def_stmt))
352338fd1498Szrj 	switch (gimple_assign_rhs_code (def_stmt))
352438fd1498Szrj 	  {
352538fd1498Szrj 	  case PLUS_EXPR:
352638fd1498Szrj 	    t = gimple_assign_rhs2 (def_stmt);
352738fd1498Szrj 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
352838fd1498Szrj 	      return;
352938fd1498Szrj 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
353038fd1498Szrj 	    v = gimple_assign_rhs1 (def_stmt);
353138fd1498Szrj 	    continue;
353238fd1498Szrj 	  case MULT_EXPR:
353338fd1498Szrj 	    t = gimple_assign_rhs2 (def_stmt);
353438fd1498Szrj 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
353538fd1498Szrj 	      return;
353638fd1498Szrj 	    linear_step = tree_to_shwi (t);
353738fd1498Szrj 	    v = gimple_assign_rhs1 (def_stmt);
353838fd1498Szrj 	    continue;
353938fd1498Szrj 	  CASE_CONVERT:
354038fd1498Szrj 	    t = gimple_assign_rhs1 (def_stmt);
354138fd1498Szrj 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
354238fd1498Szrj 		|| (TYPE_PRECISION (TREE_TYPE (v))
354338fd1498Szrj 		    < TYPE_PRECISION (TREE_TYPE (t))))
354438fd1498Szrj 	      return;
354538fd1498Szrj 	    if (!linear_step)
354638fd1498Szrj 	      linear_step = 1;
354738fd1498Szrj 	    v = t;
354838fd1498Szrj 	    continue;
354938fd1498Szrj 	  default:
355038fd1498Szrj 	    return;
355138fd1498Szrj 	  }
355238fd1498Szrj       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
355338fd1498Szrj 	       && loop->simduid
355438fd1498Szrj 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
355538fd1498Szrj 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
355638fd1498Szrj 		   == loop->simduid))
355738fd1498Szrj 	{
355838fd1498Szrj 	  if (!linear_step)
355938fd1498Szrj 	    linear_step = 1;
356038fd1498Szrj 	  arginfo->linear_step = linear_step;
356138fd1498Szrj 	  arginfo->op = base;
356238fd1498Szrj 	  arginfo->simd_lane_linear = true;
356338fd1498Szrj 	  return;
356438fd1498Szrj 	}
356538fd1498Szrj     }
356638fd1498Szrj }
356738fd1498Szrj 
356838fd1498Szrj /* Return the number of elements in vector type VECTYPE, which is associated
356938fd1498Szrj    with a SIMD clone.  At present these vectors always have a constant
357038fd1498Szrj    length.  */
357138fd1498Szrj 
357238fd1498Szrj static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)357338fd1498Szrj simd_clone_subparts (tree vectype)
357438fd1498Szrj {
357538fd1498Szrj   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
357638fd1498Szrj }
357738fd1498Szrj 
357838fd1498Szrj /* Function vectorizable_simd_clone_call.
357938fd1498Szrj 
358038fd1498Szrj    Check if STMT performs a function call that can be vectorized
358138fd1498Szrj    by calling a simd clone of the function.
358238fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
358338fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
358438fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
358538fd1498Szrj 
358638fd1498Szrj static bool
vectorizable_simd_clone_call(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)358738fd1498Szrj vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
358838fd1498Szrj 			      gimple **vec_stmt, slp_tree slp_node)
358938fd1498Szrj {
359038fd1498Szrj   tree vec_dest;
359138fd1498Szrj   tree scalar_dest;
359238fd1498Szrj   tree op, type;
359338fd1498Szrj   tree vec_oprnd0 = NULL_TREE;
359438fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
359538fd1498Szrj   tree vectype;
359638fd1498Szrj   unsigned int nunits;
359738fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
359838fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
359938fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
360038fd1498Szrj   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
360138fd1498Szrj   tree fndecl, new_temp;
360238fd1498Szrj   gimple *def_stmt;
360338fd1498Szrj   gimple *new_stmt = NULL;
360438fd1498Szrj   int ncopies, j;
360538fd1498Szrj   auto_vec<simd_call_arg_info> arginfo;
360638fd1498Szrj   vec<tree> vargs = vNULL;
360738fd1498Szrj   size_t i, nargs;
360838fd1498Szrj   tree lhs, rtype, ratype;
360938fd1498Szrj   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
361038fd1498Szrj 
361138fd1498Szrj   /* Is STMT a vectorizable call?   */
361238fd1498Szrj   if (!is_gimple_call (stmt))
361338fd1498Szrj     return false;
361438fd1498Szrj 
361538fd1498Szrj   fndecl = gimple_call_fndecl (stmt);
361638fd1498Szrj   if (fndecl == NULL_TREE)
361738fd1498Szrj     return false;
361838fd1498Szrj 
361938fd1498Szrj   struct cgraph_node *node = cgraph_node::get (fndecl);
362038fd1498Szrj   if (node == NULL || node->simd_clones == NULL)
362138fd1498Szrj     return false;
362238fd1498Szrj 
362338fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
362438fd1498Szrj     return false;
362538fd1498Szrj 
362638fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
362738fd1498Szrj       && ! vec_stmt)
362838fd1498Szrj     return false;
362938fd1498Szrj 
363038fd1498Szrj   if (gimple_call_lhs (stmt)
363138fd1498Szrj       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
363238fd1498Szrj     return false;
363338fd1498Szrj 
363438fd1498Szrj   gcc_checking_assert (!stmt_can_throw_internal (stmt));
363538fd1498Szrj 
363638fd1498Szrj   vectype = STMT_VINFO_VECTYPE (stmt_info);
363738fd1498Szrj 
363838fd1498Szrj   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
363938fd1498Szrj     return false;
364038fd1498Szrj 
364138fd1498Szrj   /* FORNOW */
364238fd1498Szrj   if (slp_node)
364338fd1498Szrj     return false;
364438fd1498Szrj 
364538fd1498Szrj   /* Process function arguments.  */
364638fd1498Szrj   nargs = gimple_call_num_args (stmt);
364738fd1498Szrj 
364838fd1498Szrj   /* Bail out if the function has zero arguments.  */
364938fd1498Szrj   if (nargs == 0)
365038fd1498Szrj     return false;
365138fd1498Szrj 
365238fd1498Szrj   arginfo.reserve (nargs, true);
365338fd1498Szrj 
365438fd1498Szrj   for (i = 0; i < nargs; i++)
365538fd1498Szrj     {
365638fd1498Szrj       simd_call_arg_info thisarginfo;
365738fd1498Szrj       affine_iv iv;
365838fd1498Szrj 
365938fd1498Szrj       thisarginfo.linear_step = 0;
366038fd1498Szrj       thisarginfo.align = 0;
366138fd1498Szrj       thisarginfo.op = NULL_TREE;
366238fd1498Szrj       thisarginfo.simd_lane_linear = false;
366338fd1498Szrj 
366438fd1498Szrj       op = gimple_call_arg (stmt, i);
366538fd1498Szrj       if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
366638fd1498Szrj 			       &thisarginfo.vectype)
366738fd1498Szrj 	  || thisarginfo.dt == vect_uninitialized_def)
366838fd1498Szrj 	{
366938fd1498Szrj 	  if (dump_enabled_p ())
367038fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
367138fd1498Szrj 			     "use not simple.\n");
367238fd1498Szrj 	  return false;
367338fd1498Szrj 	}
367438fd1498Szrj 
367538fd1498Szrj       if (thisarginfo.dt == vect_constant_def
367638fd1498Szrj 	  || thisarginfo.dt == vect_external_def)
367738fd1498Szrj 	gcc_assert (thisarginfo.vectype == NULL_TREE);
367838fd1498Szrj       else
367938fd1498Szrj 	gcc_assert (thisarginfo.vectype != NULL_TREE);
368038fd1498Szrj 
368138fd1498Szrj       /* For linear arguments, the analyze phase should have saved
368238fd1498Szrj 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
368338fd1498Szrj       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
368438fd1498Szrj 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
368538fd1498Szrj 	{
368638fd1498Szrj 	  gcc_assert (vec_stmt);
368738fd1498Szrj 	  thisarginfo.linear_step
368838fd1498Szrj 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
368938fd1498Szrj 	  thisarginfo.op
369038fd1498Szrj 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
369138fd1498Szrj 	  thisarginfo.simd_lane_linear
369238fd1498Szrj 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
369338fd1498Szrj 	       == boolean_true_node);
369438fd1498Szrj 	  /* If loop has been peeled for alignment, we need to adjust it.  */
369538fd1498Szrj 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
369638fd1498Szrj 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
369738fd1498Szrj 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
369838fd1498Szrj 	    {
369938fd1498Szrj 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
370038fd1498Szrj 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
370138fd1498Szrj 	      tree opt = TREE_TYPE (thisarginfo.op);
370238fd1498Szrj 	      bias = fold_convert (TREE_TYPE (step), bias);
370338fd1498Szrj 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
370438fd1498Szrj 	      thisarginfo.op
370538fd1498Szrj 		= fold_build2 (POINTER_TYPE_P (opt)
370638fd1498Szrj 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
370738fd1498Szrj 			       thisarginfo.op, bias);
370838fd1498Szrj 	    }
370938fd1498Szrj 	}
371038fd1498Szrj       else if (!vec_stmt
371138fd1498Szrj 	       && thisarginfo.dt != vect_constant_def
371238fd1498Szrj 	       && thisarginfo.dt != vect_external_def
371338fd1498Szrj 	       && loop_vinfo
371438fd1498Szrj 	       && TREE_CODE (op) == SSA_NAME
371538fd1498Szrj 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
371638fd1498Szrj 			     &iv, false)
371738fd1498Szrj 	       && tree_fits_shwi_p (iv.step))
371838fd1498Szrj 	{
371938fd1498Szrj 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
372038fd1498Szrj 	  thisarginfo.op = iv.base;
372138fd1498Szrj 	}
372238fd1498Szrj       else if ((thisarginfo.dt == vect_constant_def
372338fd1498Szrj 		|| thisarginfo.dt == vect_external_def)
372438fd1498Szrj 	       && POINTER_TYPE_P (TREE_TYPE (op)))
372538fd1498Szrj 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
372638fd1498Szrj       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
372738fd1498Szrj 	 linear too.  */
372838fd1498Szrj       if (POINTER_TYPE_P (TREE_TYPE (op))
372938fd1498Szrj 	  && !thisarginfo.linear_step
373038fd1498Szrj 	  && !vec_stmt
373138fd1498Szrj 	  && thisarginfo.dt != vect_constant_def
373238fd1498Szrj 	  && thisarginfo.dt != vect_external_def
373338fd1498Szrj 	  && loop_vinfo
373438fd1498Szrj 	  && !slp_node
373538fd1498Szrj 	  && TREE_CODE (op) == SSA_NAME)
373638fd1498Szrj 	vect_simd_lane_linear (op, loop, &thisarginfo);
373738fd1498Szrj 
373838fd1498Szrj       arginfo.quick_push (thisarginfo);
373938fd1498Szrj     }
374038fd1498Szrj 
374138fd1498Szrj   unsigned HOST_WIDE_INT vf;
374238fd1498Szrj   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
374338fd1498Szrj     {
374438fd1498Szrj       if (dump_enabled_p ())
374538fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
374638fd1498Szrj 			 "not considering SIMD clones; not yet supported"
374738fd1498Szrj 			 " for variable-width vectors.\n");
3748*58e805e6Szrj       return false;
374938fd1498Szrj     }
375038fd1498Szrj 
375138fd1498Szrj   unsigned int badness = 0;
375238fd1498Szrj   struct cgraph_node *bestn = NULL;
375338fd1498Szrj   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
375438fd1498Szrj     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
375538fd1498Szrj   else
375638fd1498Szrj     for (struct cgraph_node *n = node->simd_clones; n != NULL;
375738fd1498Szrj 	 n = n->simdclone->next_clone)
375838fd1498Szrj       {
375938fd1498Szrj 	unsigned int this_badness = 0;
376038fd1498Szrj 	if (n->simdclone->simdlen > vf
376138fd1498Szrj 	    || n->simdclone->nargs != nargs)
376238fd1498Szrj 	  continue;
376338fd1498Szrj 	if (n->simdclone->simdlen < vf)
376438fd1498Szrj 	  this_badness += (exact_log2 (vf)
376538fd1498Szrj 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
376638fd1498Szrj 	if (n->simdclone->inbranch)
376738fd1498Szrj 	  this_badness += 2048;
376838fd1498Szrj 	int target_badness = targetm.simd_clone.usable (n);
376938fd1498Szrj 	if (target_badness < 0)
377038fd1498Szrj 	  continue;
377138fd1498Szrj 	this_badness += target_badness * 512;
377238fd1498Szrj 	/* FORNOW: Have to add code to add the mask argument.  */
377338fd1498Szrj 	if (n->simdclone->inbranch)
377438fd1498Szrj 	  continue;
377538fd1498Szrj 	for (i = 0; i < nargs; i++)
377638fd1498Szrj 	  {
377738fd1498Szrj 	    switch (n->simdclone->args[i].arg_type)
377838fd1498Szrj 	      {
377938fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
378038fd1498Szrj 		if (!useless_type_conversion_p
378138fd1498Szrj 			(n->simdclone->args[i].orig_type,
378238fd1498Szrj 			 TREE_TYPE (gimple_call_arg (stmt, i))))
378338fd1498Szrj 		  i = -1;
378438fd1498Szrj 		else if (arginfo[i].dt == vect_constant_def
378538fd1498Szrj 			 || arginfo[i].dt == vect_external_def
378638fd1498Szrj 			 || arginfo[i].linear_step)
378738fd1498Szrj 		  this_badness += 64;
378838fd1498Szrj 		break;
378938fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
379038fd1498Szrj 		if (arginfo[i].dt != vect_constant_def
379138fd1498Szrj 		    && arginfo[i].dt != vect_external_def)
379238fd1498Szrj 		  i = -1;
379338fd1498Szrj 		break;
379438fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
379538fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
379638fd1498Szrj 		if (arginfo[i].dt == vect_constant_def
379738fd1498Szrj 		    || arginfo[i].dt == vect_external_def
379838fd1498Szrj 		    || (arginfo[i].linear_step
379938fd1498Szrj 			!= n->simdclone->args[i].linear_step))
380038fd1498Szrj 		  i = -1;
380138fd1498Szrj 		break;
380238fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
380338fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
380438fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
380538fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
380638fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
380738fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
380838fd1498Szrj 		/* FORNOW */
380938fd1498Szrj 		i = -1;
381038fd1498Szrj 		break;
381138fd1498Szrj 	      case SIMD_CLONE_ARG_TYPE_MASK:
381238fd1498Szrj 		gcc_unreachable ();
381338fd1498Szrj 	      }
381438fd1498Szrj 	    if (i == (size_t) -1)
381538fd1498Szrj 	      break;
381638fd1498Szrj 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
381738fd1498Szrj 	      {
381838fd1498Szrj 		i = -1;
381938fd1498Szrj 		break;
382038fd1498Szrj 	      }
382138fd1498Szrj 	    if (arginfo[i].align)
382238fd1498Szrj 	      this_badness += (exact_log2 (arginfo[i].align)
382338fd1498Szrj 			       - exact_log2 (n->simdclone->args[i].alignment));
382438fd1498Szrj 	  }
382538fd1498Szrj 	if (i == (size_t) -1)
382638fd1498Szrj 	  continue;
382738fd1498Szrj 	if (bestn == NULL || this_badness < badness)
382838fd1498Szrj 	  {
382938fd1498Szrj 	    bestn = n;
383038fd1498Szrj 	    badness = this_badness;
383138fd1498Szrj 	  }
383238fd1498Szrj       }
383338fd1498Szrj 
383438fd1498Szrj   if (bestn == NULL)
383538fd1498Szrj     return false;
383638fd1498Szrj 
383738fd1498Szrj   for (i = 0; i < nargs; i++)
383838fd1498Szrj     if ((arginfo[i].dt == vect_constant_def
383938fd1498Szrj 	 || arginfo[i].dt == vect_external_def)
384038fd1498Szrj 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
384138fd1498Szrj       {
384238fd1498Szrj 	arginfo[i].vectype
384338fd1498Szrj 	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
384438fd1498Szrj 								     i)));
384538fd1498Szrj 	if (arginfo[i].vectype == NULL
384638fd1498Szrj 	    || (simd_clone_subparts (arginfo[i].vectype)
384738fd1498Szrj 		> bestn->simdclone->simdlen))
384838fd1498Szrj 	  return false;
384938fd1498Szrj       }
385038fd1498Szrj 
385138fd1498Szrj   fndecl = bestn->decl;
385238fd1498Szrj   nunits = bestn->simdclone->simdlen;
385338fd1498Szrj   ncopies = vf / nunits;
385438fd1498Szrj 
385538fd1498Szrj   /* If the function isn't const, only allow it in simd loops where user
385638fd1498Szrj      has asserted that at least nunits consecutive iterations can be
385738fd1498Szrj      performed using SIMD instructions.  */
385838fd1498Szrj   if ((loop == NULL || (unsigned) loop->safelen < nunits)
385938fd1498Szrj       && gimple_vuse (stmt))
386038fd1498Szrj     return false;
386138fd1498Szrj 
386238fd1498Szrj   /* Sanity check: make sure that at least one copy of the vectorized stmt
386338fd1498Szrj      needs to be generated.  */
386438fd1498Szrj   gcc_assert (ncopies >= 1);
386538fd1498Szrj 
386638fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
386738fd1498Szrj     {
386838fd1498Szrj       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
386938fd1498Szrj       for (i = 0; i < nargs; i++)
387038fd1498Szrj 	if ((bestn->simdclone->args[i].arg_type
387138fd1498Szrj 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
387238fd1498Szrj 	    || (bestn->simdclone->args[i].arg_type
387338fd1498Szrj 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
387438fd1498Szrj 	  {
387538fd1498Szrj 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
387638fd1498Szrj 									+ 1);
387738fd1498Szrj 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
387838fd1498Szrj 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
387938fd1498Szrj 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
388038fd1498Szrj 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
388138fd1498Szrj 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
388238fd1498Szrj 	    tree sll = arginfo[i].simd_lane_linear
388338fd1498Szrj 		       ? boolean_true_node : boolean_false_node;
388438fd1498Szrj 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
388538fd1498Szrj 	  }
388638fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
388738fd1498Szrj       if (dump_enabled_p ())
388838fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
388938fd1498Szrj 			 "=== vectorizable_simd_clone_call ===\n");
389038fd1498Szrj /*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
389138fd1498Szrj       return true;
389238fd1498Szrj     }
389338fd1498Szrj 
389438fd1498Szrj   /* Transform.  */
389538fd1498Szrj 
389638fd1498Szrj   if (dump_enabled_p ())
389738fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
389838fd1498Szrj 
389938fd1498Szrj   /* Handle def.  */
390038fd1498Szrj   scalar_dest = gimple_call_lhs (stmt);
390138fd1498Szrj   vec_dest = NULL_TREE;
390238fd1498Szrj   rtype = NULL_TREE;
390338fd1498Szrj   ratype = NULL_TREE;
390438fd1498Szrj   if (scalar_dest)
390538fd1498Szrj     {
390638fd1498Szrj       vec_dest = vect_create_destination_var (scalar_dest, vectype);
390738fd1498Szrj       rtype = TREE_TYPE (TREE_TYPE (fndecl));
390838fd1498Szrj       if (TREE_CODE (rtype) == ARRAY_TYPE)
390938fd1498Szrj 	{
391038fd1498Szrj 	  ratype = rtype;
391138fd1498Szrj 	  rtype = TREE_TYPE (ratype);
391238fd1498Szrj 	}
391338fd1498Szrj     }
391438fd1498Szrj 
391538fd1498Szrj   prev_stmt_info = NULL;
391638fd1498Szrj   for (j = 0; j < ncopies; ++j)
391738fd1498Szrj     {
391838fd1498Szrj       /* Build argument list for the vectorized call.  */
391938fd1498Szrj       if (j == 0)
392038fd1498Szrj 	vargs.create (nargs);
392138fd1498Szrj       else
392238fd1498Szrj 	vargs.truncate (0);
392338fd1498Szrj 
392438fd1498Szrj       for (i = 0; i < nargs; i++)
392538fd1498Szrj 	{
392638fd1498Szrj 	  unsigned int k, l, m, o;
392738fd1498Szrj 	  tree atype;
392838fd1498Szrj 	  op = gimple_call_arg (stmt, i);
392938fd1498Szrj 	  switch (bestn->simdclone->args[i].arg_type)
393038fd1498Szrj 	    {
393138fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
393238fd1498Szrj 	      atype = bestn->simdclone->args[i].vector_type;
393338fd1498Szrj 	      o = nunits / simd_clone_subparts (atype);
393438fd1498Szrj 	      for (m = j * o; m < (j + 1) * o; m++)
393538fd1498Szrj 		{
393638fd1498Szrj 		  if (simd_clone_subparts (atype)
393738fd1498Szrj 		      < simd_clone_subparts (arginfo[i].vectype))
393838fd1498Szrj 		    {
393938fd1498Szrj 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
394038fd1498Szrj 		      k = (simd_clone_subparts (arginfo[i].vectype)
394138fd1498Szrj 			   / simd_clone_subparts (atype));
394238fd1498Szrj 		      gcc_assert ((k & (k - 1)) == 0);
394338fd1498Szrj 		      if (m == 0)
394438fd1498Szrj 			vec_oprnd0
394538fd1498Szrj 			  = vect_get_vec_def_for_operand (op, stmt);
394638fd1498Szrj 		      else
394738fd1498Szrj 			{
394838fd1498Szrj 			  vec_oprnd0 = arginfo[i].op;
394938fd1498Szrj 			  if ((m & (k - 1)) == 0)
395038fd1498Szrj 			    vec_oprnd0
395138fd1498Szrj 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
395238fd1498Szrj 								vec_oprnd0);
395338fd1498Szrj 			}
395438fd1498Szrj 		      arginfo[i].op = vec_oprnd0;
395538fd1498Szrj 		      vec_oprnd0
395638fd1498Szrj 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
395738fd1498Szrj 				  bitsize_int (prec),
395838fd1498Szrj 				  bitsize_int ((m & (k - 1)) * prec));
395938fd1498Szrj 		      new_stmt
396038fd1498Szrj 			= gimple_build_assign (make_ssa_name (atype),
396138fd1498Szrj 					       vec_oprnd0);
396238fd1498Szrj 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
396338fd1498Szrj 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
396438fd1498Szrj 		    }
396538fd1498Szrj 		  else
396638fd1498Szrj 		    {
396738fd1498Szrj 		      k = (simd_clone_subparts (atype)
396838fd1498Szrj 			   / simd_clone_subparts (arginfo[i].vectype));
396938fd1498Szrj 		      gcc_assert ((k & (k - 1)) == 0);
397038fd1498Szrj 		      vec<constructor_elt, va_gc> *ctor_elts;
397138fd1498Szrj 		      if (k != 1)
397238fd1498Szrj 			vec_alloc (ctor_elts, k);
397338fd1498Szrj 		      else
397438fd1498Szrj 			ctor_elts = NULL;
397538fd1498Szrj 		      for (l = 0; l < k; l++)
397638fd1498Szrj 			{
397738fd1498Szrj 			  if (m == 0 && l == 0)
397838fd1498Szrj 			    vec_oprnd0
397938fd1498Szrj 			      = vect_get_vec_def_for_operand (op, stmt);
398038fd1498Szrj 			  else
398138fd1498Szrj 			    vec_oprnd0
398238fd1498Szrj 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
398338fd1498Szrj 								arginfo[i].op);
398438fd1498Szrj 			  arginfo[i].op = vec_oprnd0;
398538fd1498Szrj 			  if (k == 1)
398638fd1498Szrj 			    break;
398738fd1498Szrj 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
398838fd1498Szrj 						  vec_oprnd0);
398938fd1498Szrj 			}
399038fd1498Szrj 		      if (k == 1)
399138fd1498Szrj 			vargs.safe_push (vec_oprnd0);
399238fd1498Szrj 		      else
399338fd1498Szrj 			{
399438fd1498Szrj 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
399538fd1498Szrj 			  new_stmt
399638fd1498Szrj 			    = gimple_build_assign (make_ssa_name (atype),
399738fd1498Szrj 						   vec_oprnd0);
399838fd1498Szrj 			  vect_finish_stmt_generation (stmt, new_stmt, gsi);
399938fd1498Szrj 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
400038fd1498Szrj 			}
400138fd1498Szrj 		    }
400238fd1498Szrj 		}
400338fd1498Szrj 	      break;
400438fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
400538fd1498Szrj 	      vargs.safe_push (op);
400638fd1498Szrj 	      break;
400738fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
400838fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
400938fd1498Szrj 	      if (j == 0)
401038fd1498Szrj 		{
401138fd1498Szrj 		  gimple_seq stmts;
401238fd1498Szrj 		  arginfo[i].op
401338fd1498Szrj 		    = force_gimple_operand (arginfo[i].op, &stmts, true,
401438fd1498Szrj 					    NULL_TREE);
401538fd1498Szrj 		  if (stmts != NULL)
401638fd1498Szrj 		    {
401738fd1498Szrj 		      basic_block new_bb;
401838fd1498Szrj 		      edge pe = loop_preheader_edge (loop);
401938fd1498Szrj 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
402038fd1498Szrj 		      gcc_assert (!new_bb);
402138fd1498Szrj 		    }
402238fd1498Szrj 		  if (arginfo[i].simd_lane_linear)
402338fd1498Szrj 		    {
402438fd1498Szrj 		      vargs.safe_push (arginfo[i].op);
402538fd1498Szrj 		      break;
402638fd1498Szrj 		    }
402738fd1498Szrj 		  tree phi_res = copy_ssa_name (op);
402838fd1498Szrj 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
402938fd1498Szrj 		  set_vinfo_for_stmt (new_phi,
403038fd1498Szrj 				      new_stmt_vec_info (new_phi, loop_vinfo));
403138fd1498Szrj 		  add_phi_arg (new_phi, arginfo[i].op,
403238fd1498Szrj 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
403338fd1498Szrj 		  enum tree_code code
403438fd1498Szrj 		    = POINTER_TYPE_P (TREE_TYPE (op))
403538fd1498Szrj 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
403638fd1498Szrj 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
403738fd1498Szrj 			      ? sizetype : TREE_TYPE (op);
403838fd1498Szrj 		  widest_int cst
403938fd1498Szrj 		    = wi::mul (bestn->simdclone->args[i].linear_step,
404038fd1498Szrj 			       ncopies * nunits);
404138fd1498Szrj 		  tree tcst = wide_int_to_tree (type, cst);
404238fd1498Szrj 		  tree phi_arg = copy_ssa_name (op);
404338fd1498Szrj 		  new_stmt
404438fd1498Szrj 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
404538fd1498Szrj 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
404638fd1498Szrj 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
404738fd1498Szrj 		  set_vinfo_for_stmt (new_stmt,
404838fd1498Szrj 				      new_stmt_vec_info (new_stmt, loop_vinfo));
404938fd1498Szrj 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
405038fd1498Szrj 			       UNKNOWN_LOCATION);
405138fd1498Szrj 		  arginfo[i].op = phi_res;
405238fd1498Szrj 		  vargs.safe_push (phi_res);
405338fd1498Szrj 		}
405438fd1498Szrj 	      else
405538fd1498Szrj 		{
405638fd1498Szrj 		  enum tree_code code
405738fd1498Szrj 		    = POINTER_TYPE_P (TREE_TYPE (op))
405838fd1498Szrj 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
405938fd1498Szrj 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
406038fd1498Szrj 			      ? sizetype : TREE_TYPE (op);
406138fd1498Szrj 		  widest_int cst
406238fd1498Szrj 		    = wi::mul (bestn->simdclone->args[i].linear_step,
406338fd1498Szrj 			       j * nunits);
406438fd1498Szrj 		  tree tcst = wide_int_to_tree (type, cst);
406538fd1498Szrj 		  new_temp = make_ssa_name (TREE_TYPE (op));
406638fd1498Szrj 		  new_stmt = gimple_build_assign (new_temp, code,
406738fd1498Szrj 						  arginfo[i].op, tcst);
406838fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
406938fd1498Szrj 		  vargs.safe_push (new_temp);
407038fd1498Szrj 		}
407138fd1498Szrj 	      break;
407238fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
407338fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
407438fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
407538fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
407638fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
407738fd1498Szrj 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
407838fd1498Szrj 	    default:
407938fd1498Szrj 	      gcc_unreachable ();
408038fd1498Szrj 	    }
408138fd1498Szrj 	}
408238fd1498Szrj 
408338fd1498Szrj       new_stmt = gimple_build_call_vec (fndecl, vargs);
408438fd1498Szrj       if (vec_dest)
408538fd1498Szrj 	{
408638fd1498Szrj 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
408738fd1498Szrj 	  if (ratype)
408838fd1498Szrj 	    new_temp = create_tmp_var (ratype);
408938fd1498Szrj 	  else if (simd_clone_subparts (vectype)
409038fd1498Szrj 		   == simd_clone_subparts (rtype))
409138fd1498Szrj 	    new_temp = make_ssa_name (vec_dest, new_stmt);
409238fd1498Szrj 	  else
409338fd1498Szrj 	    new_temp = make_ssa_name (rtype, new_stmt);
409438fd1498Szrj 	  gimple_call_set_lhs (new_stmt, new_temp);
409538fd1498Szrj 	}
409638fd1498Szrj       vect_finish_stmt_generation (stmt, new_stmt, gsi);
409738fd1498Szrj 
409838fd1498Szrj       if (vec_dest)
409938fd1498Szrj 	{
410038fd1498Szrj 	  if (simd_clone_subparts (vectype) < nunits)
410138fd1498Szrj 	    {
410238fd1498Szrj 	      unsigned int k, l;
410338fd1498Szrj 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
410438fd1498Szrj 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
410538fd1498Szrj 	      k = nunits / simd_clone_subparts (vectype);
410638fd1498Szrj 	      gcc_assert ((k & (k - 1)) == 0);
410738fd1498Szrj 	      for (l = 0; l < k; l++)
410838fd1498Szrj 		{
410938fd1498Szrj 		  tree t;
411038fd1498Szrj 		  if (ratype)
411138fd1498Szrj 		    {
411238fd1498Szrj 		      t = build_fold_addr_expr (new_temp);
411338fd1498Szrj 		      t = build2 (MEM_REF, vectype, t,
411438fd1498Szrj 				  build_int_cst (TREE_TYPE (t), l * bytes));
411538fd1498Szrj 		    }
411638fd1498Szrj 		  else
411738fd1498Szrj 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
411838fd1498Szrj 				bitsize_int (prec), bitsize_int (l * prec));
411938fd1498Szrj 		  new_stmt
412038fd1498Szrj 		    = gimple_build_assign (make_ssa_name (vectype), t);
412138fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
412238fd1498Szrj 		  if (j == 0 && l == 0)
412338fd1498Szrj 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
412438fd1498Szrj 		  else
412538fd1498Szrj 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
412638fd1498Szrj 
412738fd1498Szrj 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
412838fd1498Szrj 		}
412938fd1498Szrj 
413038fd1498Szrj 	      if (ratype)
413138fd1498Szrj 		{
413238fd1498Szrj 		  tree clobber = build_constructor (ratype, NULL);
413338fd1498Szrj 		  TREE_THIS_VOLATILE (clobber) = 1;
413438fd1498Szrj 		  new_stmt = gimple_build_assign (new_temp, clobber);
413538fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
413638fd1498Szrj 		}
413738fd1498Szrj 	      continue;
413838fd1498Szrj 	    }
413938fd1498Szrj 	  else if (simd_clone_subparts (vectype) > nunits)
414038fd1498Szrj 	    {
414138fd1498Szrj 	      unsigned int k = (simd_clone_subparts (vectype)
414238fd1498Szrj 				/ simd_clone_subparts (rtype));
414338fd1498Szrj 	      gcc_assert ((k & (k - 1)) == 0);
414438fd1498Szrj 	      if ((j & (k - 1)) == 0)
414538fd1498Szrj 		vec_alloc (ret_ctor_elts, k);
414638fd1498Szrj 	      if (ratype)
414738fd1498Szrj 		{
414838fd1498Szrj 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
414938fd1498Szrj 		  for (m = 0; m < o; m++)
415038fd1498Szrj 		    {
415138fd1498Szrj 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
415238fd1498Szrj 					 size_int (m), NULL_TREE, NULL_TREE);
415338fd1498Szrj 		      new_stmt
415438fd1498Szrj 			= gimple_build_assign (make_ssa_name (rtype), tem);
415538fd1498Szrj 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
415638fd1498Szrj 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
415738fd1498Szrj 					      gimple_assign_lhs (new_stmt));
415838fd1498Szrj 		    }
415938fd1498Szrj 		  tree clobber = build_constructor (ratype, NULL);
416038fd1498Szrj 		  TREE_THIS_VOLATILE (clobber) = 1;
416138fd1498Szrj 		  new_stmt = gimple_build_assign (new_temp, clobber);
416238fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
416338fd1498Szrj 		}
416438fd1498Szrj 	      else
416538fd1498Szrj 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
416638fd1498Szrj 	      if ((j & (k - 1)) != k - 1)
416738fd1498Szrj 		continue;
416838fd1498Szrj 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
416938fd1498Szrj 	      new_stmt
417038fd1498Szrj 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
417138fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
417238fd1498Szrj 
417338fd1498Szrj 	      if ((unsigned) j == k - 1)
417438fd1498Szrj 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
417538fd1498Szrj 	      else
417638fd1498Szrj 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
417738fd1498Szrj 
417838fd1498Szrj 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
417938fd1498Szrj 	      continue;
418038fd1498Szrj 	    }
418138fd1498Szrj 	  else if (ratype)
418238fd1498Szrj 	    {
418338fd1498Szrj 	      tree t = build_fold_addr_expr (new_temp);
418438fd1498Szrj 	      t = build2 (MEM_REF, vectype, t,
418538fd1498Szrj 			  build_int_cst (TREE_TYPE (t), 0));
418638fd1498Szrj 	      new_stmt
418738fd1498Szrj 		= gimple_build_assign (make_ssa_name (vec_dest), t);
418838fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
418938fd1498Szrj 	      tree clobber = build_constructor (ratype, NULL);
419038fd1498Szrj 	      TREE_THIS_VOLATILE (clobber) = 1;
419138fd1498Szrj 	      vect_finish_stmt_generation (stmt,
419238fd1498Szrj 					   gimple_build_assign (new_temp,
419338fd1498Szrj 								clobber), gsi);
419438fd1498Szrj 	    }
419538fd1498Szrj 	}
419638fd1498Szrj 
419738fd1498Szrj       if (j == 0)
419838fd1498Szrj 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
419938fd1498Szrj       else
420038fd1498Szrj 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
420138fd1498Szrj 
420238fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
420338fd1498Szrj     }
420438fd1498Szrj 
420538fd1498Szrj   vargs.release ();
420638fd1498Szrj 
420738fd1498Szrj   /* The call in STMT might prevent it from being removed in dce.
420838fd1498Szrj      We however cannot remove it here, due to the way the ssa name
420938fd1498Szrj      it defines is mapped to the new definition.  So just replace
421038fd1498Szrj      rhs of the statement with something harmless.  */
421138fd1498Szrj 
421238fd1498Szrj   if (slp_node)
421338fd1498Szrj     return true;
421438fd1498Szrj 
421538fd1498Szrj   if (scalar_dest)
421638fd1498Szrj     {
421738fd1498Szrj       type = TREE_TYPE (scalar_dest);
421838fd1498Szrj       if (is_pattern_stmt_p (stmt_info))
421938fd1498Szrj 	lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
422038fd1498Szrj       else
422138fd1498Szrj 	lhs = gimple_call_lhs (stmt);
422238fd1498Szrj       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
422338fd1498Szrj     }
422438fd1498Szrj   else
422538fd1498Szrj     new_stmt = gimple_build_nop ();
422638fd1498Szrj   set_vinfo_for_stmt (new_stmt, stmt_info);
422738fd1498Szrj   set_vinfo_for_stmt (stmt, NULL);
422838fd1498Szrj   STMT_VINFO_STMT (stmt_info) = new_stmt;
422938fd1498Szrj   gsi_replace (gsi, new_stmt, true);
423038fd1498Szrj   unlink_stmt_vdef (stmt);
423138fd1498Szrj 
423238fd1498Szrj   return true;
423338fd1498Szrj }
423438fd1498Szrj 
423538fd1498Szrj 
423638fd1498Szrj /* Function vect_gen_widened_results_half
423738fd1498Szrj 
423838fd1498Szrj    Create a vector stmt whose code, type, number of arguments, and result
423938fd1498Szrj    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
424038fd1498Szrj    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
424138fd1498Szrj    In the case that CODE is a CALL_EXPR, this means that a call to DECL
424238fd1498Szrj    needs to be created (DECL is a function-decl of a target-builtin).
424338fd1498Szrj    STMT is the original scalar stmt that we are vectorizing.  */
424438fd1498Szrj 
424538fd1498Szrj static gimple *
vect_gen_widened_results_half(enum tree_code code,tree decl,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,gimple * stmt)424638fd1498Szrj vect_gen_widened_results_half (enum tree_code code,
424738fd1498Szrj 			       tree decl,
424838fd1498Szrj                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
424938fd1498Szrj 			       tree vec_dest, gimple_stmt_iterator *gsi,
425038fd1498Szrj 			       gimple *stmt)
425138fd1498Szrj {
425238fd1498Szrj   gimple *new_stmt;
425338fd1498Szrj   tree new_temp;
425438fd1498Szrj 
425538fd1498Szrj   /* Generate half of the widened result:  */
425638fd1498Szrj   if (code == CALL_EXPR)
425738fd1498Szrj     {
425838fd1498Szrj       /* Target specific support  */
425938fd1498Szrj       if (op_type == binary_op)
426038fd1498Szrj 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
426138fd1498Szrj       else
426238fd1498Szrj 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
426338fd1498Szrj       new_temp = make_ssa_name (vec_dest, new_stmt);
426438fd1498Szrj       gimple_call_set_lhs (new_stmt, new_temp);
426538fd1498Szrj     }
426638fd1498Szrj   else
426738fd1498Szrj     {
426838fd1498Szrj       /* Generic support */
426938fd1498Szrj       gcc_assert (op_type == TREE_CODE_LENGTH (code));
427038fd1498Szrj       if (op_type != binary_op)
427138fd1498Szrj 	vec_oprnd1 = NULL;
427238fd1498Szrj       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
427338fd1498Szrj       new_temp = make_ssa_name (vec_dest, new_stmt);
427438fd1498Szrj       gimple_assign_set_lhs (new_stmt, new_temp);
427538fd1498Szrj     }
427638fd1498Szrj   vect_finish_stmt_generation (stmt, new_stmt, gsi);
427738fd1498Szrj 
427838fd1498Szrj   return new_stmt;
427938fd1498Szrj }
428038fd1498Szrj 
428138fd1498Szrj 
428238fd1498Szrj /* Get vectorized definitions for loop-based vectorization.  For the first
428338fd1498Szrj    operand we call vect_get_vec_def_for_operand() (with OPRND containing
428438fd1498Szrj    scalar operand), and for the rest we get a copy with
428538fd1498Szrj    vect_get_vec_def_for_stmt_copy() using the previous vector definition
428638fd1498Szrj    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
428738fd1498Szrj    The vectors are collected into VEC_OPRNDS.  */
428838fd1498Szrj 
428938fd1498Szrj static void
vect_get_loop_based_defs(tree * oprnd,gimple * stmt,enum vect_def_type dt,vec<tree> * vec_oprnds,int multi_step_cvt)429038fd1498Szrj vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
429138fd1498Szrj 			  vec<tree> *vec_oprnds, int multi_step_cvt)
429238fd1498Szrj {
429338fd1498Szrj   tree vec_oprnd;
429438fd1498Szrj 
429538fd1498Szrj   /* Get first vector operand.  */
429638fd1498Szrj   /* All the vector operands except the very first one (that is scalar oprnd)
429738fd1498Szrj      are stmt copies.  */
429838fd1498Szrj   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
429938fd1498Szrj     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
430038fd1498Szrj   else
430138fd1498Szrj     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
430238fd1498Szrj 
430338fd1498Szrj   vec_oprnds->quick_push (vec_oprnd);
430438fd1498Szrj 
430538fd1498Szrj   /* Get second vector operand.  */
430638fd1498Szrj   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
430738fd1498Szrj   vec_oprnds->quick_push (vec_oprnd);
430838fd1498Szrj 
430938fd1498Szrj   *oprnd = vec_oprnd;
431038fd1498Szrj 
431138fd1498Szrj   /* For conversion in multiple steps, continue to get operands
431238fd1498Szrj      recursively.  */
431338fd1498Szrj   if (multi_step_cvt)
431438fd1498Szrj     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
431538fd1498Szrj }
431638fd1498Szrj 
431738fd1498Szrj 
431838fd1498Szrj /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
431938fd1498Szrj    For multi-step conversions store the resulting vectors and call the function
432038fd1498Szrj    recursively.  */
432138fd1498Szrj 
432238fd1498Szrj static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,gimple * stmt,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)432338fd1498Szrj vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
432438fd1498Szrj 				       int multi_step_cvt, gimple *stmt,
432538fd1498Szrj 				       vec<tree> vec_dsts,
432638fd1498Szrj 				       gimple_stmt_iterator *gsi,
432738fd1498Szrj 				       slp_tree slp_node, enum tree_code code,
432838fd1498Szrj 				       stmt_vec_info *prev_stmt_info)
432938fd1498Szrj {
433038fd1498Szrj   unsigned int i;
433138fd1498Szrj   tree vop0, vop1, new_tmp, vec_dest;
433238fd1498Szrj   gimple *new_stmt;
433338fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
433438fd1498Szrj 
433538fd1498Szrj   vec_dest = vec_dsts.pop ();
433638fd1498Szrj 
433738fd1498Szrj   for (i = 0; i < vec_oprnds->length (); i += 2)
433838fd1498Szrj     {
433938fd1498Szrj       /* Create demotion operation.  */
434038fd1498Szrj       vop0 = (*vec_oprnds)[i];
434138fd1498Szrj       vop1 = (*vec_oprnds)[i + 1];
434238fd1498Szrj       new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
434338fd1498Szrj       new_tmp = make_ssa_name (vec_dest, new_stmt);
434438fd1498Szrj       gimple_assign_set_lhs (new_stmt, new_tmp);
434538fd1498Szrj       vect_finish_stmt_generation (stmt, new_stmt, gsi);
434638fd1498Szrj 
434738fd1498Szrj       if (multi_step_cvt)
434838fd1498Szrj 	/* Store the resulting vector for next recursive call.  */
434938fd1498Szrj 	(*vec_oprnds)[i/2] = new_tmp;
435038fd1498Szrj       else
435138fd1498Szrj 	{
435238fd1498Szrj 	  /* This is the last step of the conversion sequence. Store the
435338fd1498Szrj 	     vectors in SLP_NODE or in vector info of the scalar statement
435438fd1498Szrj 	     (or in STMT_VINFO_RELATED_STMT chain).  */
435538fd1498Szrj 	  if (slp_node)
435638fd1498Szrj 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
435738fd1498Szrj 	  else
435838fd1498Szrj 	    {
435938fd1498Szrj 	      if (!*prev_stmt_info)
436038fd1498Szrj 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
436138fd1498Szrj 	      else
436238fd1498Szrj 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
436338fd1498Szrj 
436438fd1498Szrj 	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
436538fd1498Szrj 	    }
436638fd1498Szrj 	}
436738fd1498Szrj     }
436838fd1498Szrj 
436938fd1498Szrj   /* For multi-step demotion operations we first generate demotion operations
437038fd1498Szrj      from the source type to the intermediate types, and then combine the
437138fd1498Szrj      results (stored in VEC_OPRNDS) in demotion operation to the destination
437238fd1498Szrj      type.  */
437338fd1498Szrj   if (multi_step_cvt)
437438fd1498Szrj     {
437538fd1498Szrj       /* At each level of recursion we have half of the operands we had at the
437638fd1498Szrj 	 previous level.  */
437738fd1498Szrj       vec_oprnds->truncate ((i+1)/2);
437838fd1498Szrj       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
437938fd1498Szrj 					     stmt, vec_dsts, gsi, slp_node,
438038fd1498Szrj 					     VEC_PACK_TRUNC_EXPR,
438138fd1498Szrj 					     prev_stmt_info);
438238fd1498Szrj     }
438338fd1498Szrj 
438438fd1498Szrj   vec_dsts.quick_push (vec_dest);
438538fd1498Szrj }
438638fd1498Szrj 
438738fd1498Szrj 
438838fd1498Szrj /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
438938fd1498Szrj    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
439038fd1498Szrj    the resulting vectors and call the function recursively.  */
439138fd1498Szrj 
439238fd1498Szrj static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,gimple * stmt,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,tree decl1,tree decl2,int op_type)439338fd1498Szrj vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
439438fd1498Szrj 					vec<tree> *vec_oprnds1,
439538fd1498Szrj 					gimple *stmt, tree vec_dest,
439638fd1498Szrj 					gimple_stmt_iterator *gsi,
439738fd1498Szrj 					enum tree_code code1,
439838fd1498Szrj 					enum tree_code code2, tree decl1,
439938fd1498Szrj 					tree decl2, int op_type)
440038fd1498Szrj {
440138fd1498Szrj   int i;
440238fd1498Szrj   tree vop0, vop1, new_tmp1, new_tmp2;
440338fd1498Szrj   gimple *new_stmt1, *new_stmt2;
440438fd1498Szrj   vec<tree> vec_tmp = vNULL;
440538fd1498Szrj 
440638fd1498Szrj   vec_tmp.create (vec_oprnds0->length () * 2);
440738fd1498Szrj   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
440838fd1498Szrj     {
440938fd1498Szrj       if (op_type == binary_op)
441038fd1498Szrj 	vop1 = (*vec_oprnds1)[i];
441138fd1498Szrj       else
441238fd1498Szrj 	vop1 = NULL_TREE;
441338fd1498Szrj 
441438fd1498Szrj       /* Generate the two halves of promotion operation.  */
441538fd1498Szrj       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
441638fd1498Szrj 						 op_type, vec_dest, gsi, stmt);
441738fd1498Szrj       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
441838fd1498Szrj 						 op_type, vec_dest, gsi, stmt);
441938fd1498Szrj       if (is_gimple_call (new_stmt1))
442038fd1498Szrj 	{
442138fd1498Szrj 	  new_tmp1 = gimple_call_lhs (new_stmt1);
442238fd1498Szrj 	  new_tmp2 = gimple_call_lhs (new_stmt2);
442338fd1498Szrj 	}
442438fd1498Szrj       else
442538fd1498Szrj 	{
442638fd1498Szrj 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
442738fd1498Szrj 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
442838fd1498Szrj 	}
442938fd1498Szrj 
443038fd1498Szrj       /* Store the results for the next step.  */
443138fd1498Szrj       vec_tmp.quick_push (new_tmp1);
443238fd1498Szrj       vec_tmp.quick_push (new_tmp2);
443338fd1498Szrj     }
443438fd1498Szrj 
443538fd1498Szrj   vec_oprnds0->release ();
443638fd1498Szrj   *vec_oprnds0 = vec_tmp;
443738fd1498Szrj }
443838fd1498Szrj 
443938fd1498Szrj 
444038fd1498Szrj /* Check if STMT performs a conversion operation, that can be vectorized.
444138fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
444238fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
444338fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
444438fd1498Szrj 
444538fd1498Szrj static bool
vectorizable_conversion(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)444638fd1498Szrj vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
444738fd1498Szrj 			 gimple **vec_stmt, slp_tree slp_node)
444838fd1498Szrj {
444938fd1498Szrj   tree vec_dest;
445038fd1498Szrj   tree scalar_dest;
445138fd1498Szrj   tree op0, op1 = NULL_TREE;
445238fd1498Szrj   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
445338fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
445438fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
445538fd1498Szrj   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
445638fd1498Szrj   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
445738fd1498Szrj   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
445838fd1498Szrj   tree new_temp;
445938fd1498Szrj   gimple *def_stmt;
446038fd1498Szrj   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
446138fd1498Szrj   int ndts = 2;
446238fd1498Szrj   gimple *new_stmt = NULL;
446338fd1498Szrj   stmt_vec_info prev_stmt_info;
446438fd1498Szrj   poly_uint64 nunits_in;
446538fd1498Szrj   poly_uint64 nunits_out;
446638fd1498Szrj   tree vectype_out, vectype_in;
446738fd1498Szrj   int ncopies, i, j;
446838fd1498Szrj   tree lhs_type, rhs_type;
446938fd1498Szrj   enum { NARROW, NONE, WIDEN } modifier;
447038fd1498Szrj   vec<tree> vec_oprnds0 = vNULL;
447138fd1498Szrj   vec<tree> vec_oprnds1 = vNULL;
447238fd1498Szrj   tree vop0;
447338fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
447438fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
447538fd1498Szrj   int multi_step_cvt = 0;
447638fd1498Szrj   vec<tree> interm_types = vNULL;
447738fd1498Szrj   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
447838fd1498Szrj   int op_type;
447938fd1498Szrj   unsigned short fltsz;
448038fd1498Szrj 
448138fd1498Szrj   /* Is STMT a vectorizable conversion?   */
448238fd1498Szrj 
448338fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
448438fd1498Szrj     return false;
448538fd1498Szrj 
448638fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
448738fd1498Szrj       && ! vec_stmt)
448838fd1498Szrj     return false;
448938fd1498Szrj 
449038fd1498Szrj   if (!is_gimple_assign (stmt))
449138fd1498Szrj     return false;
449238fd1498Szrj 
449338fd1498Szrj   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
449438fd1498Szrj     return false;
449538fd1498Szrj 
449638fd1498Szrj   code = gimple_assign_rhs_code (stmt);
449738fd1498Szrj   if (!CONVERT_EXPR_CODE_P (code)
449838fd1498Szrj       && code != FIX_TRUNC_EXPR
449938fd1498Szrj       && code != FLOAT_EXPR
450038fd1498Szrj       && code != WIDEN_MULT_EXPR
450138fd1498Szrj       && code != WIDEN_LSHIFT_EXPR)
450238fd1498Szrj     return false;
450338fd1498Szrj 
450438fd1498Szrj   op_type = TREE_CODE_LENGTH (code);
450538fd1498Szrj 
450638fd1498Szrj   /* Check types of lhs and rhs.  */
450738fd1498Szrj   scalar_dest = gimple_assign_lhs (stmt);
450838fd1498Szrj   lhs_type = TREE_TYPE (scalar_dest);
450938fd1498Szrj   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
451038fd1498Szrj 
451138fd1498Szrj   op0 = gimple_assign_rhs1 (stmt);
451238fd1498Szrj   rhs_type = TREE_TYPE (op0);
451338fd1498Szrj 
451438fd1498Szrj   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
451538fd1498Szrj       && !((INTEGRAL_TYPE_P (lhs_type)
451638fd1498Szrj 	    && INTEGRAL_TYPE_P (rhs_type))
451738fd1498Szrj 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
451838fd1498Szrj 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
451938fd1498Szrj     return false;
452038fd1498Szrj 
452138fd1498Szrj   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
452238fd1498Szrj       && ((INTEGRAL_TYPE_P (lhs_type)
452338fd1498Szrj 	   && !type_has_mode_precision_p (lhs_type))
452438fd1498Szrj 	  || (INTEGRAL_TYPE_P (rhs_type)
452538fd1498Szrj 	      && !type_has_mode_precision_p (rhs_type))))
452638fd1498Szrj     {
452738fd1498Szrj       if (dump_enabled_p ())
452838fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
452938fd1498Szrj                          "type conversion to/from bit-precision unsupported."
453038fd1498Szrj                          "\n");
453138fd1498Szrj       return false;
453238fd1498Szrj     }
453338fd1498Szrj 
453438fd1498Szrj   /* Check the operands of the operation.  */
453538fd1498Szrj   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
453638fd1498Szrj     {
453738fd1498Szrj       if (dump_enabled_p ())
453838fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
453938fd1498Szrj                          "use not simple.\n");
454038fd1498Szrj       return false;
454138fd1498Szrj     }
454238fd1498Szrj   if (op_type == binary_op)
454338fd1498Szrj     {
454438fd1498Szrj       bool ok;
454538fd1498Szrj 
454638fd1498Szrj       op1 = gimple_assign_rhs2 (stmt);
454738fd1498Szrj       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
454838fd1498Szrj       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
454938fd1498Szrj 	 OP1.  */
455038fd1498Szrj       if (CONSTANT_CLASS_P (op0))
455138fd1498Szrj 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
455238fd1498Szrj       else
455338fd1498Szrj 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
455438fd1498Szrj 
455538fd1498Szrj       if (!ok)
455638fd1498Szrj 	{
455738fd1498Szrj           if (dump_enabled_p ())
455838fd1498Szrj             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
455938fd1498Szrj                              "use not simple.\n");
456038fd1498Szrj 	  return false;
456138fd1498Szrj 	}
456238fd1498Szrj     }
456338fd1498Szrj 
456438fd1498Szrj   /* If op0 is an external or constant defs use a vector type of
456538fd1498Szrj      the same size as the output vector type.  */
456638fd1498Szrj   if (!vectype_in)
456738fd1498Szrj     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
456838fd1498Szrj   if (vec_stmt)
456938fd1498Szrj     gcc_assert (vectype_in);
457038fd1498Szrj   if (!vectype_in)
457138fd1498Szrj     {
457238fd1498Szrj       if (dump_enabled_p ())
457338fd1498Szrj 	{
457438fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
457538fd1498Szrj                            "no vectype for scalar type ");
457638fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
457738fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
457838fd1498Szrj 	}
457938fd1498Szrj 
458038fd1498Szrj       return false;
458138fd1498Szrj     }
458238fd1498Szrj 
458338fd1498Szrj   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
458438fd1498Szrj       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
458538fd1498Szrj     {
458638fd1498Szrj       if (dump_enabled_p ())
458738fd1498Szrj 	{
458838fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
458938fd1498Szrj                            "can't convert between boolean and non "
459038fd1498Szrj 			   "boolean vectors");
459138fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
459238fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
459338fd1498Szrj 	}
459438fd1498Szrj 
459538fd1498Szrj       return false;
459638fd1498Szrj     }
459738fd1498Szrj 
459838fd1498Szrj   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
459938fd1498Szrj   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
460038fd1498Szrj   if (known_eq (nunits_out, nunits_in))
460138fd1498Szrj     modifier = NONE;
460238fd1498Szrj   else if (multiple_p (nunits_out, nunits_in))
460338fd1498Szrj     modifier = NARROW;
460438fd1498Szrj   else
460538fd1498Szrj     {
460638fd1498Szrj       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
460738fd1498Szrj       modifier = WIDEN;
460838fd1498Szrj     }
460938fd1498Szrj 
461038fd1498Szrj   /* Multiple types in SLP are handled by creating the appropriate number of
461138fd1498Szrj      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
461238fd1498Szrj      case of SLP.  */
461338fd1498Szrj   if (slp_node)
461438fd1498Szrj     ncopies = 1;
461538fd1498Szrj   else if (modifier == NARROW)
461638fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
461738fd1498Szrj   else
461838fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
461938fd1498Szrj 
462038fd1498Szrj   /* Sanity check: make sure that at least one copy of the vectorized stmt
462138fd1498Szrj      needs to be generated.  */
462238fd1498Szrj   gcc_assert (ncopies >= 1);
462338fd1498Szrj 
462438fd1498Szrj   bool found_mode = false;
462538fd1498Szrj   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
462638fd1498Szrj   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
462738fd1498Szrj   opt_scalar_mode rhs_mode_iter;
462838fd1498Szrj 
462938fd1498Szrj   /* Supportable by target?  */
463038fd1498Szrj   switch (modifier)
463138fd1498Szrj     {
463238fd1498Szrj     case NONE:
463338fd1498Szrj       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
463438fd1498Szrj 	return false;
463538fd1498Szrj       if (supportable_convert_operation (code, vectype_out, vectype_in,
463638fd1498Szrj 					 &decl1, &code1))
463738fd1498Szrj 	break;
463838fd1498Szrj       /* FALLTHRU */
463938fd1498Szrj     unsupported:
464038fd1498Szrj       if (dump_enabled_p ())
464138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464238fd1498Szrj                          "conversion not supported by target.\n");
464338fd1498Szrj       return false;
464438fd1498Szrj 
464538fd1498Szrj     case WIDEN:
464638fd1498Szrj       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
464738fd1498Szrj 					  &code1, &code2, &multi_step_cvt,
464838fd1498Szrj 					  &interm_types))
464938fd1498Szrj 	{
465038fd1498Szrj 	  /* Binary widening operation can only be supported directly by the
465138fd1498Szrj 	     architecture.  */
465238fd1498Szrj 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
465338fd1498Szrj 	  break;
465438fd1498Szrj 	}
465538fd1498Szrj 
465638fd1498Szrj       if (code != FLOAT_EXPR
465738fd1498Szrj 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
465838fd1498Szrj 	goto unsupported;
465938fd1498Szrj 
466038fd1498Szrj       fltsz = GET_MODE_SIZE (lhs_mode);
466138fd1498Szrj       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
466238fd1498Szrj 	{
466338fd1498Szrj 	  rhs_mode = rhs_mode_iter.require ();
466438fd1498Szrj 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
466538fd1498Szrj 	    break;
466638fd1498Szrj 
466738fd1498Szrj 	  cvt_type
466838fd1498Szrj 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
466938fd1498Szrj 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
467038fd1498Szrj 	  if (cvt_type == NULL_TREE)
467138fd1498Szrj 	    goto unsupported;
467238fd1498Szrj 
467338fd1498Szrj 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
467438fd1498Szrj 	    {
467538fd1498Szrj 	      if (!supportable_convert_operation (code, vectype_out,
467638fd1498Szrj 						  cvt_type, &decl1, &codecvt1))
467738fd1498Szrj 		goto unsupported;
467838fd1498Szrj 	    }
467938fd1498Szrj 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
468038fd1498Szrj 						    cvt_type, &codecvt1,
468138fd1498Szrj 						    &codecvt2, &multi_step_cvt,
468238fd1498Szrj 						    &interm_types))
468338fd1498Szrj 	    continue;
468438fd1498Szrj 	  else
468538fd1498Szrj 	    gcc_assert (multi_step_cvt == 0);
468638fd1498Szrj 
468738fd1498Szrj 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
468838fd1498Szrj 					      vectype_in, &code1, &code2,
468938fd1498Szrj 					      &multi_step_cvt, &interm_types))
469038fd1498Szrj 	    {
469138fd1498Szrj 	      found_mode = true;
469238fd1498Szrj 	      break;
469338fd1498Szrj 	    }
469438fd1498Szrj 	}
469538fd1498Szrj 
469638fd1498Szrj       if (!found_mode)
469738fd1498Szrj 	goto unsupported;
469838fd1498Szrj 
469938fd1498Szrj       if (GET_MODE_SIZE (rhs_mode) == fltsz)
470038fd1498Szrj 	codecvt2 = ERROR_MARK;
470138fd1498Szrj       else
470238fd1498Szrj 	{
470338fd1498Szrj 	  multi_step_cvt++;
470438fd1498Szrj 	  interm_types.safe_push (cvt_type);
470538fd1498Szrj 	  cvt_type = NULL_TREE;
470638fd1498Szrj 	}
470738fd1498Szrj       break;
470838fd1498Szrj 
470938fd1498Szrj     case NARROW:
471038fd1498Szrj       gcc_assert (op_type == unary_op);
471138fd1498Szrj       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
471238fd1498Szrj 					   &code1, &multi_step_cvt,
471338fd1498Szrj 					   &interm_types))
471438fd1498Szrj 	break;
471538fd1498Szrj 
471638fd1498Szrj       if (code != FIX_TRUNC_EXPR
471738fd1498Szrj 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
471838fd1498Szrj 	goto unsupported;
471938fd1498Szrj 
472038fd1498Szrj       cvt_type
472138fd1498Szrj 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
472238fd1498Szrj       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
472338fd1498Szrj       if (cvt_type == NULL_TREE)
472438fd1498Szrj 	goto unsupported;
472538fd1498Szrj       if (!supportable_convert_operation (code, cvt_type, vectype_in,
472638fd1498Szrj 					  &decl1, &codecvt1))
472738fd1498Szrj 	goto unsupported;
472838fd1498Szrj       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
472938fd1498Szrj 					   &code1, &multi_step_cvt,
473038fd1498Szrj 					   &interm_types))
473138fd1498Szrj 	break;
473238fd1498Szrj       goto unsupported;
473338fd1498Szrj 
473438fd1498Szrj     default:
473538fd1498Szrj       gcc_unreachable ();
473638fd1498Szrj     }
473738fd1498Szrj 
473838fd1498Szrj   if (!vec_stmt)		/* transformation not required.  */
473938fd1498Szrj     {
474038fd1498Szrj       if (dump_enabled_p ())
474138fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
474238fd1498Szrj                          "=== vectorizable_conversion ===\n");
474338fd1498Szrj       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
474438fd1498Szrj         {
474538fd1498Szrj 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
474638fd1498Szrj 	  if (!slp_node)
474738fd1498Szrj 	    vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
474838fd1498Szrj 	}
474938fd1498Szrj       else if (modifier == NARROW)
475038fd1498Szrj 	{
475138fd1498Szrj 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
475238fd1498Szrj 	  if (!slp_node)
475338fd1498Szrj 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
475438fd1498Szrj 	}
475538fd1498Szrj       else
475638fd1498Szrj 	{
475738fd1498Szrj 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
475838fd1498Szrj 	  if (!slp_node)
475938fd1498Szrj 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
476038fd1498Szrj 	}
476138fd1498Szrj       interm_types.release ();
476238fd1498Szrj       return true;
476338fd1498Szrj     }
476438fd1498Szrj 
476538fd1498Szrj   /* Transform.  */
476638fd1498Szrj   if (dump_enabled_p ())
476738fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
476838fd1498Szrj                      "transform conversion. ncopies = %d.\n", ncopies);
476938fd1498Szrj 
477038fd1498Szrj   if (op_type == binary_op)
477138fd1498Szrj     {
477238fd1498Szrj       if (CONSTANT_CLASS_P (op0))
477338fd1498Szrj 	op0 = fold_convert (TREE_TYPE (op1), op0);
477438fd1498Szrj       else if (CONSTANT_CLASS_P (op1))
477538fd1498Szrj 	op1 = fold_convert (TREE_TYPE (op0), op1);
477638fd1498Szrj     }
477738fd1498Szrj 
477838fd1498Szrj   /* In case of multi-step conversion, we first generate conversion operations
477938fd1498Szrj      to the intermediate types, and then from that types to the final one.
478038fd1498Szrj      We create vector destinations for the intermediate type (TYPES) received
478138fd1498Szrj      from supportable_*_operation, and store them in the correct order
478238fd1498Szrj      for future use in vect_create_vectorized_*_stmts ().  */
478338fd1498Szrj   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
478438fd1498Szrj   vec_dest = vect_create_destination_var (scalar_dest,
478538fd1498Szrj 					  (cvt_type && modifier == WIDEN)
478638fd1498Szrj 					  ? cvt_type : vectype_out);
478738fd1498Szrj   vec_dsts.quick_push (vec_dest);
478838fd1498Szrj 
478938fd1498Szrj   if (multi_step_cvt)
479038fd1498Szrj     {
479138fd1498Szrj       for (i = interm_types.length () - 1;
479238fd1498Szrj 	   interm_types.iterate (i, &intermediate_type); i--)
479338fd1498Szrj 	{
479438fd1498Szrj 	  vec_dest = vect_create_destination_var (scalar_dest,
479538fd1498Szrj 						  intermediate_type);
479638fd1498Szrj 	  vec_dsts.quick_push (vec_dest);
479738fd1498Szrj 	}
479838fd1498Szrj     }
479938fd1498Szrj 
480038fd1498Szrj   if (cvt_type)
480138fd1498Szrj     vec_dest = vect_create_destination_var (scalar_dest,
480238fd1498Szrj 					    modifier == WIDEN
480338fd1498Szrj 					    ? vectype_out : cvt_type);
480438fd1498Szrj 
480538fd1498Szrj   if (!slp_node)
480638fd1498Szrj     {
480738fd1498Szrj       if (modifier == WIDEN)
480838fd1498Szrj 	{
480938fd1498Szrj 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
481038fd1498Szrj 	  if (op_type == binary_op)
481138fd1498Szrj 	    vec_oprnds1.create (1);
481238fd1498Szrj 	}
481338fd1498Szrj       else if (modifier == NARROW)
481438fd1498Szrj 	vec_oprnds0.create (
481538fd1498Szrj 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
481638fd1498Szrj     }
481738fd1498Szrj   else if (code == WIDEN_LSHIFT_EXPR)
481838fd1498Szrj     vec_oprnds1.create (slp_node->vec_stmts_size);
481938fd1498Szrj 
482038fd1498Szrj   last_oprnd = op0;
482138fd1498Szrj   prev_stmt_info = NULL;
482238fd1498Szrj   switch (modifier)
482338fd1498Szrj     {
482438fd1498Szrj     case NONE:
482538fd1498Szrj       for (j = 0; j < ncopies; j++)
482638fd1498Szrj 	{
482738fd1498Szrj 	  if (j == 0)
482838fd1498Szrj 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
482938fd1498Szrj 	  else
483038fd1498Szrj 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
483138fd1498Szrj 
483238fd1498Szrj 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
483338fd1498Szrj 	    {
483438fd1498Szrj 	      /* Arguments are ready, create the new vector stmt.  */
483538fd1498Szrj 	      if (code1 == CALL_EXPR)
483638fd1498Szrj 		{
483738fd1498Szrj 		  new_stmt = gimple_build_call (decl1, 1, vop0);
483838fd1498Szrj 		  new_temp = make_ssa_name (vec_dest, new_stmt);
483938fd1498Szrj 		  gimple_call_set_lhs (new_stmt, new_temp);
484038fd1498Szrj 		}
484138fd1498Szrj 	      else
484238fd1498Szrj 		{
484338fd1498Szrj 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
484438fd1498Szrj 		  new_stmt = gimple_build_assign (vec_dest, code1, vop0);
484538fd1498Szrj 		  new_temp = make_ssa_name (vec_dest, new_stmt);
484638fd1498Szrj 		  gimple_assign_set_lhs (new_stmt, new_temp);
484738fd1498Szrj 		}
484838fd1498Szrj 
484938fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
485038fd1498Szrj 	      if (slp_node)
485138fd1498Szrj 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
485238fd1498Szrj 	      else
485338fd1498Szrj 		{
485438fd1498Szrj 		  if (!prev_stmt_info)
485538fd1498Szrj 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
485638fd1498Szrj 		  else
485738fd1498Szrj 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
485838fd1498Szrj 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
485938fd1498Szrj 		}
486038fd1498Szrj 	    }
486138fd1498Szrj 	}
486238fd1498Szrj       break;
486338fd1498Szrj 
486438fd1498Szrj     case WIDEN:
486538fd1498Szrj       /* In case the vectorization factor (VF) is bigger than the number
486638fd1498Szrj 	 of elements that we can fit in a vectype (nunits), we have to
486738fd1498Szrj 	 generate more than one vector stmt - i.e - we need to "unroll"
486838fd1498Szrj 	 the vector stmt by a factor VF/nunits.  */
486938fd1498Szrj       for (j = 0; j < ncopies; j++)
487038fd1498Szrj 	{
487138fd1498Szrj 	  /* Handle uses.  */
487238fd1498Szrj 	  if (j == 0)
487338fd1498Szrj 	    {
487438fd1498Szrj 	      if (slp_node)
487538fd1498Szrj 		{
487638fd1498Szrj 		  if (code == WIDEN_LSHIFT_EXPR)
487738fd1498Szrj 		    {
487838fd1498Szrj 		      unsigned int k;
487938fd1498Szrj 
488038fd1498Szrj 		      vec_oprnd1 = op1;
488138fd1498Szrj 		      /* Store vec_oprnd1 for every vector stmt to be created
488238fd1498Szrj 			 for SLP_NODE.  We check during the analysis that all
488338fd1498Szrj 			 the shift arguments are the same.  */
488438fd1498Szrj 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
488538fd1498Szrj 			vec_oprnds1.quick_push (vec_oprnd1);
488638fd1498Szrj 
488738fd1498Szrj 		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
488838fd1498Szrj 					 slp_node);
488938fd1498Szrj 		    }
489038fd1498Szrj 		  else
489138fd1498Szrj 		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
489238fd1498Szrj 				       &vec_oprnds1, slp_node);
489338fd1498Szrj 		}
489438fd1498Szrj 	      else
489538fd1498Szrj 		{
489638fd1498Szrj 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
489738fd1498Szrj 		  vec_oprnds0.quick_push (vec_oprnd0);
489838fd1498Szrj 		  if (op_type == binary_op)
489938fd1498Szrj 		    {
490038fd1498Szrj 		      if (code == WIDEN_LSHIFT_EXPR)
490138fd1498Szrj 			vec_oprnd1 = op1;
490238fd1498Szrj 		      else
490338fd1498Szrj 			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
490438fd1498Szrj 		      vec_oprnds1.quick_push (vec_oprnd1);
490538fd1498Szrj 		    }
490638fd1498Szrj 		}
490738fd1498Szrj 	    }
490838fd1498Szrj 	  else
490938fd1498Szrj 	    {
491038fd1498Szrj 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
491138fd1498Szrj 	      vec_oprnds0.truncate (0);
491238fd1498Szrj 	      vec_oprnds0.quick_push (vec_oprnd0);
491338fd1498Szrj 	      if (op_type == binary_op)
491438fd1498Szrj 		{
491538fd1498Szrj 		  if (code == WIDEN_LSHIFT_EXPR)
491638fd1498Szrj 		    vec_oprnd1 = op1;
491738fd1498Szrj 		  else
491838fd1498Szrj 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
491938fd1498Szrj 								 vec_oprnd1);
492038fd1498Szrj 		  vec_oprnds1.truncate (0);
492138fd1498Szrj 		  vec_oprnds1.quick_push (vec_oprnd1);
492238fd1498Szrj 		}
492338fd1498Szrj 	    }
492438fd1498Szrj 
492538fd1498Szrj 	  /* Arguments are ready.  Create the new vector stmts.  */
492638fd1498Szrj 	  for (i = multi_step_cvt; i >= 0; i--)
492738fd1498Szrj 	    {
492838fd1498Szrj 	      tree this_dest = vec_dsts[i];
492938fd1498Szrj 	      enum tree_code c1 = code1, c2 = code2;
493038fd1498Szrj 	      if (i == 0 && codecvt2 != ERROR_MARK)
493138fd1498Szrj 		{
493238fd1498Szrj 		  c1 = codecvt1;
493338fd1498Szrj 		  c2 = codecvt2;
493438fd1498Szrj 		}
493538fd1498Szrj 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
493638fd1498Szrj 						      &vec_oprnds1,
493738fd1498Szrj 						      stmt, this_dest, gsi,
493838fd1498Szrj 						      c1, c2, decl1, decl2,
493938fd1498Szrj 						      op_type);
494038fd1498Szrj 	    }
494138fd1498Szrj 
494238fd1498Szrj 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
494338fd1498Szrj 	    {
494438fd1498Szrj 	      if (cvt_type)
494538fd1498Szrj 		{
494638fd1498Szrj 		  if (codecvt1 == CALL_EXPR)
494738fd1498Szrj 		    {
494838fd1498Szrj 		      new_stmt = gimple_build_call (decl1, 1, vop0);
494938fd1498Szrj 		      new_temp = make_ssa_name (vec_dest, new_stmt);
495038fd1498Szrj 		      gimple_call_set_lhs (new_stmt, new_temp);
495138fd1498Szrj 		    }
495238fd1498Szrj 		  else
495338fd1498Szrj 		    {
495438fd1498Szrj 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
495538fd1498Szrj 		      new_temp = make_ssa_name (vec_dest);
495638fd1498Szrj 		      new_stmt = gimple_build_assign (new_temp, codecvt1,
495738fd1498Szrj 						      vop0);
495838fd1498Szrj 		    }
495938fd1498Szrj 
496038fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
496138fd1498Szrj 		}
496238fd1498Szrj 	      else
496338fd1498Szrj 		new_stmt = SSA_NAME_DEF_STMT (vop0);
496438fd1498Szrj 
496538fd1498Szrj 	      if (slp_node)
496638fd1498Szrj 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
496738fd1498Szrj 	      else
496838fd1498Szrj 		{
496938fd1498Szrj 		  if (!prev_stmt_info)
497038fd1498Szrj 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
497138fd1498Szrj 		  else
497238fd1498Szrj 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
497338fd1498Szrj 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
497438fd1498Szrj 		}
497538fd1498Szrj 	    }
497638fd1498Szrj 	}
497738fd1498Szrj 
497838fd1498Szrj       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
497938fd1498Szrj       break;
498038fd1498Szrj 
498138fd1498Szrj     case NARROW:
498238fd1498Szrj       /* In case the vectorization factor (VF) is bigger than the number
498338fd1498Szrj 	 of elements that we can fit in a vectype (nunits), we have to
498438fd1498Szrj 	 generate more than one vector stmt - i.e - we need to "unroll"
498538fd1498Szrj 	 the vector stmt by a factor VF/nunits.  */
498638fd1498Szrj       for (j = 0; j < ncopies; j++)
498738fd1498Szrj 	{
498838fd1498Szrj 	  /* Handle uses.  */
498938fd1498Szrj 	  if (slp_node)
499038fd1498Szrj 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
499138fd1498Szrj 			       slp_node);
499238fd1498Szrj 	  else
499338fd1498Szrj 	    {
499438fd1498Szrj 	      vec_oprnds0.truncate (0);
499538fd1498Szrj 	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
499638fd1498Szrj 					vect_pow2 (multi_step_cvt) - 1);
499738fd1498Szrj 	    }
499838fd1498Szrj 
499938fd1498Szrj 	  /* Arguments are ready.  Create the new vector stmts.  */
500038fd1498Szrj 	  if (cvt_type)
500138fd1498Szrj 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
500238fd1498Szrj 	      {
500338fd1498Szrj 		if (codecvt1 == CALL_EXPR)
500438fd1498Szrj 		  {
500538fd1498Szrj 		    new_stmt = gimple_build_call (decl1, 1, vop0);
500638fd1498Szrj 		    new_temp = make_ssa_name (vec_dest, new_stmt);
500738fd1498Szrj 		    gimple_call_set_lhs (new_stmt, new_temp);
500838fd1498Szrj 		  }
500938fd1498Szrj 		else
501038fd1498Szrj 		  {
501138fd1498Szrj 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
501238fd1498Szrj 		    new_temp = make_ssa_name (vec_dest);
501338fd1498Szrj 		    new_stmt = gimple_build_assign (new_temp, codecvt1,
501438fd1498Szrj 						    vop0);
501538fd1498Szrj 		  }
501638fd1498Szrj 
501738fd1498Szrj 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
501838fd1498Szrj 		vec_oprnds0[i] = new_temp;
501938fd1498Szrj 	      }
502038fd1498Szrj 
502138fd1498Szrj 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
502238fd1498Szrj 						 stmt, vec_dsts, gsi,
502338fd1498Szrj 						 slp_node, code1,
502438fd1498Szrj 						 &prev_stmt_info);
502538fd1498Szrj 	}
502638fd1498Szrj 
502738fd1498Szrj       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
502838fd1498Szrj       break;
502938fd1498Szrj     }
503038fd1498Szrj 
503138fd1498Szrj   vec_oprnds0.release ();
503238fd1498Szrj   vec_oprnds1.release ();
503338fd1498Szrj   interm_types.release ();
503438fd1498Szrj 
503538fd1498Szrj   return true;
503638fd1498Szrj }
503738fd1498Szrj 
503838fd1498Szrj 
503938fd1498Szrj /* Function vectorizable_assignment.
504038fd1498Szrj 
504138fd1498Szrj    Check if STMT performs an assignment (copy) that can be vectorized.
504238fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
504338fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
504438fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
504538fd1498Szrj 
504638fd1498Szrj static bool
vectorizable_assignment(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)504738fd1498Szrj vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
504838fd1498Szrj 			 gimple **vec_stmt, slp_tree slp_node)
504938fd1498Szrj {
505038fd1498Szrj   tree vec_dest;
505138fd1498Szrj   tree scalar_dest;
505238fd1498Szrj   tree op;
505338fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
505438fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
505538fd1498Szrj   tree new_temp;
505638fd1498Szrj   gimple *def_stmt;
505738fd1498Szrj   enum vect_def_type dt[1] = {vect_unknown_def_type};
505838fd1498Szrj   int ndts = 1;
505938fd1498Szrj   int ncopies;
506038fd1498Szrj   int i, j;
506138fd1498Szrj   vec<tree> vec_oprnds = vNULL;
506238fd1498Szrj   tree vop;
506338fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
506438fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
506538fd1498Szrj   gimple *new_stmt = NULL;
506638fd1498Szrj   stmt_vec_info prev_stmt_info = NULL;
506738fd1498Szrj   enum tree_code code;
506838fd1498Szrj   tree vectype_in;
506938fd1498Szrj 
507038fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
507138fd1498Szrj     return false;
507238fd1498Szrj 
507338fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
507438fd1498Szrj       && ! vec_stmt)
507538fd1498Szrj     return false;
507638fd1498Szrj 
507738fd1498Szrj   /* Is vectorizable assignment?  */
507838fd1498Szrj   if (!is_gimple_assign (stmt))
507938fd1498Szrj     return false;
508038fd1498Szrj 
508138fd1498Szrj   scalar_dest = gimple_assign_lhs (stmt);
508238fd1498Szrj   if (TREE_CODE (scalar_dest) != SSA_NAME)
508338fd1498Szrj     return false;
508438fd1498Szrj 
508538fd1498Szrj   code = gimple_assign_rhs_code (stmt);
508638fd1498Szrj   if (gimple_assign_single_p (stmt)
508738fd1498Szrj       || code == PAREN_EXPR
508838fd1498Szrj       || CONVERT_EXPR_CODE_P (code))
508938fd1498Szrj     op = gimple_assign_rhs1 (stmt);
509038fd1498Szrj   else
509138fd1498Szrj     return false;
509238fd1498Szrj 
509338fd1498Szrj   if (code == VIEW_CONVERT_EXPR)
509438fd1498Szrj     op = TREE_OPERAND (op, 0);
509538fd1498Szrj 
509638fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
509738fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
509838fd1498Szrj 
509938fd1498Szrj   /* Multiple types in SLP are handled by creating the appropriate number of
510038fd1498Szrj      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
510138fd1498Szrj      case of SLP.  */
510238fd1498Szrj   if (slp_node)
510338fd1498Szrj     ncopies = 1;
510438fd1498Szrj   else
510538fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
510638fd1498Szrj 
510738fd1498Szrj   gcc_assert (ncopies >= 1);
510838fd1498Szrj 
510938fd1498Szrj   if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
511038fd1498Szrj     {
511138fd1498Szrj       if (dump_enabled_p ())
511238fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
511338fd1498Szrj                          "use not simple.\n");
511438fd1498Szrj       return false;
511538fd1498Szrj     }
511638fd1498Szrj 
511738fd1498Szrj   /* We can handle NOP_EXPR conversions that do not change the number
511838fd1498Szrj      of elements or the vector size.  */
511938fd1498Szrj   if ((CONVERT_EXPR_CODE_P (code)
512038fd1498Szrj        || code == VIEW_CONVERT_EXPR)
512138fd1498Szrj       && (!vectype_in
512238fd1498Szrj 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
512338fd1498Szrj 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
512438fd1498Szrj 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
512538fd1498Szrj     return false;
512638fd1498Szrj 
512738fd1498Szrj   /* We do not handle bit-precision changes.  */
512838fd1498Szrj   if ((CONVERT_EXPR_CODE_P (code)
512938fd1498Szrj        || code == VIEW_CONVERT_EXPR)
513038fd1498Szrj       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
513138fd1498Szrj       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
513238fd1498Szrj 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
513338fd1498Szrj       /* But a conversion that does not change the bit-pattern is ok.  */
513438fd1498Szrj       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
513538fd1498Szrj 	    > TYPE_PRECISION (TREE_TYPE (op)))
513638fd1498Szrj 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
513738fd1498Szrj       /* Conversion between boolean types of different sizes is
513838fd1498Szrj 	 a simple assignment in case their vectypes are same
513938fd1498Szrj 	 boolean vectors.  */
514038fd1498Szrj       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
514138fd1498Szrj 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
514238fd1498Szrj     {
514338fd1498Szrj       if (dump_enabled_p ())
514438fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514538fd1498Szrj                          "type conversion to/from bit-precision "
514638fd1498Szrj                          "unsupported.\n");
514738fd1498Szrj       return false;
514838fd1498Szrj     }
514938fd1498Szrj 
515038fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
515138fd1498Szrj     {
515238fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
515338fd1498Szrj       if (dump_enabled_p ())
515438fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
515538fd1498Szrj                          "=== vectorizable_assignment ===\n");
515638fd1498Szrj       if (!slp_node)
515738fd1498Szrj 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
515838fd1498Szrj       return true;
515938fd1498Szrj     }
516038fd1498Szrj 
516138fd1498Szrj   /* Transform.  */
516238fd1498Szrj   if (dump_enabled_p ())
516338fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
516438fd1498Szrj 
516538fd1498Szrj   /* Handle def.  */
516638fd1498Szrj   vec_dest = vect_create_destination_var (scalar_dest, vectype);
516738fd1498Szrj 
516838fd1498Szrj   /* Handle use.  */
516938fd1498Szrj   for (j = 0; j < ncopies; j++)
517038fd1498Szrj     {
517138fd1498Szrj       /* Handle uses.  */
517238fd1498Szrj       if (j == 0)
517338fd1498Szrj         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
517438fd1498Szrj       else
517538fd1498Szrj         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
517638fd1498Szrj 
517738fd1498Szrj       /* Arguments are ready. create the new vector stmt.  */
517838fd1498Szrj       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
517938fd1498Szrj        {
518038fd1498Szrj 	 if (CONVERT_EXPR_CODE_P (code)
518138fd1498Szrj 	     || code == VIEW_CONVERT_EXPR)
518238fd1498Szrj 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
518338fd1498Szrj          new_stmt = gimple_build_assign (vec_dest, vop);
518438fd1498Szrj          new_temp = make_ssa_name (vec_dest, new_stmt);
518538fd1498Szrj          gimple_assign_set_lhs (new_stmt, new_temp);
518638fd1498Szrj          vect_finish_stmt_generation (stmt, new_stmt, gsi);
518738fd1498Szrj          if (slp_node)
518838fd1498Szrj            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
518938fd1498Szrj        }
519038fd1498Szrj 
519138fd1498Szrj       if (slp_node)
519238fd1498Szrj         continue;
519338fd1498Szrj 
519438fd1498Szrj       if (j == 0)
519538fd1498Szrj         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
519638fd1498Szrj       else
519738fd1498Szrj         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
519838fd1498Szrj 
519938fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
520038fd1498Szrj     }
520138fd1498Szrj 
520238fd1498Szrj   vec_oprnds.release ();
520338fd1498Szrj   return true;
520438fd1498Szrj }
520538fd1498Szrj 
520638fd1498Szrj 
520738fd1498Szrj /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
520838fd1498Szrj    either as shift by a scalar or by a vector.  */
520938fd1498Szrj 
521038fd1498Szrj bool
vect_supportable_shift(enum tree_code code,tree scalar_type)521138fd1498Szrj vect_supportable_shift (enum tree_code code, tree scalar_type)
521238fd1498Szrj {
521338fd1498Szrj 
521438fd1498Szrj   machine_mode vec_mode;
521538fd1498Szrj   optab optab;
521638fd1498Szrj   int icode;
521738fd1498Szrj   tree vectype;
521838fd1498Szrj 
521938fd1498Szrj   vectype = get_vectype_for_scalar_type (scalar_type);
522038fd1498Szrj   if (!vectype)
522138fd1498Szrj     return false;
522238fd1498Szrj 
522338fd1498Szrj   optab = optab_for_tree_code (code, vectype, optab_scalar);
522438fd1498Szrj   if (!optab
522538fd1498Szrj       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
522638fd1498Szrj     {
522738fd1498Szrj       optab = optab_for_tree_code (code, vectype, optab_vector);
522838fd1498Szrj       if (!optab
522938fd1498Szrj           || (optab_handler (optab, TYPE_MODE (vectype))
523038fd1498Szrj                       == CODE_FOR_nothing))
523138fd1498Szrj         return false;
523238fd1498Szrj     }
523338fd1498Szrj 
523438fd1498Szrj   vec_mode = TYPE_MODE (vectype);
523538fd1498Szrj   icode = (int) optab_handler (optab, vec_mode);
523638fd1498Szrj   if (icode == CODE_FOR_nothing)
523738fd1498Szrj     return false;
523838fd1498Szrj 
523938fd1498Szrj   return true;
524038fd1498Szrj }
524138fd1498Szrj 
524238fd1498Szrj 
524338fd1498Szrj /* Function vectorizable_shift.
524438fd1498Szrj 
524538fd1498Szrj    Check if STMT performs a shift operation that can be vectorized.
524638fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
524738fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
524838fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
524938fd1498Szrj 
525038fd1498Szrj static bool
vectorizable_shift(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)525138fd1498Szrj vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
525238fd1498Szrj                     gimple **vec_stmt, slp_tree slp_node)
525338fd1498Szrj {
525438fd1498Szrj   tree vec_dest;
525538fd1498Szrj   tree scalar_dest;
525638fd1498Szrj   tree op0, op1 = NULL;
525738fd1498Szrj   tree vec_oprnd1 = NULL_TREE;
525838fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
525938fd1498Szrj   tree vectype;
526038fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
526138fd1498Szrj   enum tree_code code;
526238fd1498Szrj   machine_mode vec_mode;
526338fd1498Szrj   tree new_temp;
526438fd1498Szrj   optab optab;
526538fd1498Szrj   int icode;
526638fd1498Szrj   machine_mode optab_op2_mode;
526738fd1498Szrj   gimple *def_stmt;
526838fd1498Szrj   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
526938fd1498Szrj   int ndts = 2;
527038fd1498Szrj   gimple *new_stmt = NULL;
527138fd1498Szrj   stmt_vec_info prev_stmt_info;
527238fd1498Szrj   poly_uint64 nunits_in;
527338fd1498Szrj   poly_uint64 nunits_out;
527438fd1498Szrj   tree vectype_out;
527538fd1498Szrj   tree op1_vectype;
527638fd1498Szrj   int ncopies;
527738fd1498Szrj   int j, i;
527838fd1498Szrj   vec<tree> vec_oprnds0 = vNULL;
527938fd1498Szrj   vec<tree> vec_oprnds1 = vNULL;
528038fd1498Szrj   tree vop0, vop1;
528138fd1498Szrj   unsigned int k;
528238fd1498Szrj   bool scalar_shift_arg = true;
528338fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
528438fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
528538fd1498Szrj 
528638fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
528738fd1498Szrj     return false;
528838fd1498Szrj 
528938fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
529038fd1498Szrj       && ! vec_stmt)
529138fd1498Szrj     return false;
529238fd1498Szrj 
529338fd1498Szrj   /* Is STMT a vectorizable binary/unary operation?   */
529438fd1498Szrj   if (!is_gimple_assign (stmt))
529538fd1498Szrj     return false;
529638fd1498Szrj 
529738fd1498Szrj   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
529838fd1498Szrj     return false;
529938fd1498Szrj 
530038fd1498Szrj   code = gimple_assign_rhs_code (stmt);
530138fd1498Szrj 
530238fd1498Szrj   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
530338fd1498Szrj       || code == RROTATE_EXPR))
530438fd1498Szrj     return false;
530538fd1498Szrj 
530638fd1498Szrj   scalar_dest = gimple_assign_lhs (stmt);
530738fd1498Szrj   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
530838fd1498Szrj   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
530938fd1498Szrj     {
531038fd1498Szrj       if (dump_enabled_p ())
531138fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531238fd1498Szrj                          "bit-precision shifts not supported.\n");
531338fd1498Szrj       return false;
531438fd1498Szrj     }
531538fd1498Szrj 
531638fd1498Szrj   op0 = gimple_assign_rhs1 (stmt);
531738fd1498Szrj   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
531838fd1498Szrj     {
531938fd1498Szrj       if (dump_enabled_p ())
532038fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532138fd1498Szrj                          "use not simple.\n");
532238fd1498Szrj       return false;
532338fd1498Szrj     }
532438fd1498Szrj   /* If op0 is an external or constant def use a vector type with
532538fd1498Szrj      the same size as the output vector type.  */
532638fd1498Szrj   if (!vectype)
532738fd1498Szrj     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
532838fd1498Szrj   if (vec_stmt)
532938fd1498Szrj     gcc_assert (vectype);
533038fd1498Szrj   if (!vectype)
533138fd1498Szrj     {
533238fd1498Szrj       if (dump_enabled_p ())
533338fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
533438fd1498Szrj                          "no vectype for scalar type\n");
533538fd1498Szrj       return false;
533638fd1498Szrj     }
533738fd1498Szrj 
533838fd1498Szrj   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
533938fd1498Szrj   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
534038fd1498Szrj   if (maybe_ne (nunits_out, nunits_in))
534138fd1498Szrj     return false;
534238fd1498Szrj 
534338fd1498Szrj   op1 = gimple_assign_rhs2 (stmt);
534438fd1498Szrj   if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
534538fd1498Szrj     {
534638fd1498Szrj       if (dump_enabled_p ())
534738fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
534838fd1498Szrj                          "use not simple.\n");
534938fd1498Szrj       return false;
535038fd1498Szrj     }
535138fd1498Szrj 
535238fd1498Szrj   /* Multiple types in SLP are handled by creating the appropriate number of
535338fd1498Szrj      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
535438fd1498Szrj      case of SLP.  */
535538fd1498Szrj   if (slp_node)
535638fd1498Szrj     ncopies = 1;
535738fd1498Szrj   else
535838fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
535938fd1498Szrj 
536038fd1498Szrj   gcc_assert (ncopies >= 1);
536138fd1498Szrj 
536238fd1498Szrj   /* Determine whether the shift amount is a vector, or scalar.  If the
536338fd1498Szrj      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
536438fd1498Szrj 
536538fd1498Szrj   if ((dt[1] == vect_internal_def
536638fd1498Szrj        || dt[1] == vect_induction_def)
536738fd1498Szrj       && !slp_node)
536838fd1498Szrj     scalar_shift_arg = false;
536938fd1498Szrj   else if (dt[1] == vect_constant_def
537038fd1498Szrj 	   || dt[1] == vect_external_def
537138fd1498Szrj 	   || dt[1] == vect_internal_def)
537238fd1498Szrj     {
537338fd1498Szrj       /* In SLP, need to check whether the shift count is the same,
537438fd1498Szrj 	 in loops if it is a constant or invariant, it is always
537538fd1498Szrj 	 a scalar shift.  */
537638fd1498Szrj       if (slp_node)
537738fd1498Szrj 	{
537838fd1498Szrj 	  vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
537938fd1498Szrj 	  gimple *slpstmt;
538038fd1498Szrj 
538138fd1498Szrj 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
538238fd1498Szrj 	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
538338fd1498Szrj 	      scalar_shift_arg = false;
5384*58e805e6Szrj 
5385*58e805e6Szrj 	  /* For internal SLP defs we have to make sure we see scalar stmts
5386*58e805e6Szrj 	     for all vector elements.
5387*58e805e6Szrj 	     ???  For different vectors we could resort to a different
5388*58e805e6Szrj 	     scalar shift operand but code-generation below simply always
5389*58e805e6Szrj 	     takes the first.  */
5390*58e805e6Szrj 	  if (dt[1] == vect_internal_def
5391*58e805e6Szrj 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),                           stmts.length ()))
5392*58e805e6Szrj 	    scalar_shift_arg = false;
539338fd1498Szrj 	}
539438fd1498Szrj 
539538fd1498Szrj       /* If the shift amount is computed by a pattern stmt we cannot
539638fd1498Szrj          use the scalar amount directly thus give up and use a vector
539738fd1498Szrj 	 shift.  */
539838fd1498Szrj       if (dt[1] == vect_internal_def)
539938fd1498Szrj 	{
540038fd1498Szrj 	  gimple *def = SSA_NAME_DEF_STMT (op1);
540138fd1498Szrj 	  if (is_pattern_stmt_p (vinfo_for_stmt (def)))
540238fd1498Szrj 	    scalar_shift_arg = false;
540338fd1498Szrj 	}
540438fd1498Szrj     }
540538fd1498Szrj   else
540638fd1498Szrj     {
540738fd1498Szrj       if (dump_enabled_p ())
540838fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540938fd1498Szrj                          "operand mode requires invariant argument.\n");
541038fd1498Szrj       return false;
541138fd1498Szrj     }
541238fd1498Szrj 
541338fd1498Szrj   /* Vector shifted by vector.  */
541438fd1498Szrj   if (!scalar_shift_arg)
541538fd1498Szrj     {
541638fd1498Szrj       optab = optab_for_tree_code (code, vectype, optab_vector);
541738fd1498Szrj       if (dump_enabled_p ())
541838fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
541938fd1498Szrj                          "vector/vector shift/rotate found.\n");
542038fd1498Szrj 
542138fd1498Szrj       if (!op1_vectype)
542238fd1498Szrj 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
542338fd1498Szrj       if (op1_vectype == NULL_TREE
542438fd1498Szrj 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
542538fd1498Szrj 	{
542638fd1498Szrj 	  if (dump_enabled_p ())
542738fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
542838fd1498Szrj                              "unusable type for last operand in"
542938fd1498Szrj                              " vector/vector shift/rotate.\n");
543038fd1498Szrj 	  return false;
543138fd1498Szrj 	}
543238fd1498Szrj     }
543338fd1498Szrj   /* See if the machine has a vector shifted by scalar insn and if not
543438fd1498Szrj      then see if it has a vector shifted by vector insn.  */
543538fd1498Szrj   else
543638fd1498Szrj     {
543738fd1498Szrj       optab = optab_for_tree_code (code, vectype, optab_scalar);
543838fd1498Szrj       if (optab
543938fd1498Szrj           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
544038fd1498Szrj         {
544138fd1498Szrj           if (dump_enabled_p ())
544238fd1498Szrj             dump_printf_loc (MSG_NOTE, vect_location,
544338fd1498Szrj                              "vector/scalar shift/rotate found.\n");
544438fd1498Szrj         }
544538fd1498Szrj       else
544638fd1498Szrj         {
544738fd1498Szrj           optab = optab_for_tree_code (code, vectype, optab_vector);
544838fd1498Szrj           if (optab
544938fd1498Szrj                && (optab_handler (optab, TYPE_MODE (vectype))
545038fd1498Szrj                       != CODE_FOR_nothing))
545138fd1498Szrj             {
545238fd1498Szrj 	      scalar_shift_arg = false;
545338fd1498Szrj 
545438fd1498Szrj               if (dump_enabled_p ())
545538fd1498Szrj                 dump_printf_loc (MSG_NOTE, vect_location,
545638fd1498Szrj                                  "vector/vector shift/rotate found.\n");
545738fd1498Szrj 
545838fd1498Szrj               /* Unlike the other binary operators, shifts/rotates have
545938fd1498Szrj                  the rhs being int, instead of the same type as the lhs,
546038fd1498Szrj                  so make sure the scalar is the right type if we are
546138fd1498Szrj 		 dealing with vectors of long long/long/short/char.  */
546238fd1498Szrj               if (dt[1] == vect_constant_def)
546338fd1498Szrj                 op1 = fold_convert (TREE_TYPE (vectype), op1);
546438fd1498Szrj 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
546538fd1498Szrj 						   TREE_TYPE (op1)))
546638fd1498Szrj 		{
546738fd1498Szrj 		  if (slp_node
546838fd1498Szrj 		      && TYPE_MODE (TREE_TYPE (vectype))
546938fd1498Szrj 			 != TYPE_MODE (TREE_TYPE (op1)))
547038fd1498Szrj 		    {
547138fd1498Szrj                       if (dump_enabled_p ())
547238fd1498Szrj                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
547338fd1498Szrj                                          "unusable type for last operand in"
547438fd1498Szrj                                          " vector/vector shift/rotate.\n");
547538fd1498Szrj 		      return false;
547638fd1498Szrj 		    }
547738fd1498Szrj 		  if (vec_stmt && !slp_node)
547838fd1498Szrj 		    {
547938fd1498Szrj 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
548038fd1498Szrj 		      op1 = vect_init_vector (stmt, op1,
548138fd1498Szrj 					      TREE_TYPE (vectype), NULL);
548238fd1498Szrj 		    }
548338fd1498Szrj 		}
548438fd1498Szrj             }
548538fd1498Szrj         }
548638fd1498Szrj     }
548738fd1498Szrj 
548838fd1498Szrj   /* Supportable by target?  */
548938fd1498Szrj   if (!optab)
549038fd1498Szrj     {
549138fd1498Szrj       if (dump_enabled_p ())
549238fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549338fd1498Szrj                          "no optab.\n");
549438fd1498Szrj       return false;
549538fd1498Szrj     }
549638fd1498Szrj   vec_mode = TYPE_MODE (vectype);
549738fd1498Szrj   icode = (int) optab_handler (optab, vec_mode);
549838fd1498Szrj   if (icode == CODE_FOR_nothing)
549938fd1498Szrj     {
550038fd1498Szrj       if (dump_enabled_p ())
550138fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
550238fd1498Szrj                          "op not supported by target.\n");
550338fd1498Szrj       /* Check only during analysis.  */
550438fd1498Szrj       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
550538fd1498Szrj 	  || (!vec_stmt
550638fd1498Szrj 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
550738fd1498Szrj         return false;
550838fd1498Szrj       if (dump_enabled_p ())
550938fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
551038fd1498Szrj                          "proceeding using word mode.\n");
551138fd1498Szrj     }
551238fd1498Szrj 
551338fd1498Szrj   /* Worthwhile without SIMD support?  Check only during analysis.  */
551438fd1498Szrj   if (!vec_stmt
551538fd1498Szrj       && !VECTOR_MODE_P (TYPE_MODE (vectype))
551638fd1498Szrj       && !vect_worthwhile_without_simd_p (vinfo, code))
551738fd1498Szrj     {
551838fd1498Szrj       if (dump_enabled_p ())
551938fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552038fd1498Szrj                          "not worthwhile without SIMD support.\n");
552138fd1498Szrj       return false;
552238fd1498Szrj     }
552338fd1498Szrj 
552438fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
552538fd1498Szrj     {
552638fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
552738fd1498Szrj       if (dump_enabled_p ())
552838fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
552938fd1498Szrj                          "=== vectorizable_shift ===\n");
553038fd1498Szrj       if (!slp_node)
553138fd1498Szrj 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
553238fd1498Szrj       return true;
553338fd1498Szrj     }
553438fd1498Szrj 
553538fd1498Szrj   /* Transform.  */
553638fd1498Szrj 
553738fd1498Szrj   if (dump_enabled_p ())
553838fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
553938fd1498Szrj                      "transform binary/unary operation.\n");
554038fd1498Szrj 
554138fd1498Szrj   /* Handle def.  */
554238fd1498Szrj   vec_dest = vect_create_destination_var (scalar_dest, vectype);
554338fd1498Szrj 
554438fd1498Szrj   prev_stmt_info = NULL;
554538fd1498Szrj   for (j = 0; j < ncopies; j++)
554638fd1498Szrj     {
554738fd1498Szrj       /* Handle uses.  */
554838fd1498Szrj       if (j == 0)
554938fd1498Szrj         {
555038fd1498Szrj           if (scalar_shift_arg)
555138fd1498Szrj             {
555238fd1498Szrj               /* Vector shl and shr insn patterns can be defined with scalar
555338fd1498Szrj                  operand 2 (shift operand).  In this case, use constant or loop
555438fd1498Szrj                  invariant op1 directly, without extending it to vector mode
555538fd1498Szrj                  first.  */
555638fd1498Szrj               optab_op2_mode = insn_data[icode].operand[2].mode;
555738fd1498Szrj               if (!VECTOR_MODE_P (optab_op2_mode))
555838fd1498Szrj                 {
555938fd1498Szrj                   if (dump_enabled_p ())
556038fd1498Szrj                     dump_printf_loc (MSG_NOTE, vect_location,
556138fd1498Szrj                                      "operand 1 using scalar mode.\n");
556238fd1498Szrj                   vec_oprnd1 = op1;
556338fd1498Szrj                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
556438fd1498Szrj                   vec_oprnds1.quick_push (vec_oprnd1);
556538fd1498Szrj                   if (slp_node)
556638fd1498Szrj                     {
556738fd1498Szrj                       /* Store vec_oprnd1 for every vector stmt to be created
556838fd1498Szrj                          for SLP_NODE.  We check during the analysis that all
556938fd1498Szrj                          the shift arguments are the same.
557038fd1498Szrj                          TODO: Allow different constants for different vector
557138fd1498Szrj                          stmts generated for an SLP instance.  */
557238fd1498Szrj                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
557338fd1498Szrj                         vec_oprnds1.quick_push (vec_oprnd1);
557438fd1498Szrj                     }
557538fd1498Szrj                 }
557638fd1498Szrj             }
557738fd1498Szrj 
557838fd1498Szrj           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
557938fd1498Szrj              (a special case for certain kind of vector shifts); otherwise,
558038fd1498Szrj              operand 1 should be of a vector type (the usual case).  */
558138fd1498Szrj           if (vec_oprnd1)
558238fd1498Szrj             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
558338fd1498Szrj                                slp_node);
558438fd1498Szrj           else
558538fd1498Szrj             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
558638fd1498Szrj                                slp_node);
558738fd1498Szrj         }
558838fd1498Szrj       else
558938fd1498Szrj         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
559038fd1498Szrj 
559138fd1498Szrj       /* Arguments are ready.  Create the new vector stmt.  */
559238fd1498Szrj       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
559338fd1498Szrj         {
559438fd1498Szrj           vop1 = vec_oprnds1[i];
559538fd1498Szrj 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
559638fd1498Szrj           new_temp = make_ssa_name (vec_dest, new_stmt);
559738fd1498Szrj           gimple_assign_set_lhs (new_stmt, new_temp);
559838fd1498Szrj           vect_finish_stmt_generation (stmt, new_stmt, gsi);
559938fd1498Szrj           if (slp_node)
560038fd1498Szrj             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
560138fd1498Szrj         }
560238fd1498Szrj 
560338fd1498Szrj       if (slp_node)
560438fd1498Szrj         continue;
560538fd1498Szrj 
560638fd1498Szrj       if (j == 0)
560738fd1498Szrj         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
560838fd1498Szrj       else
560938fd1498Szrj         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
561038fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
561138fd1498Szrj     }
561238fd1498Szrj 
561338fd1498Szrj   vec_oprnds0.release ();
561438fd1498Szrj   vec_oprnds1.release ();
561538fd1498Szrj 
561638fd1498Szrj   return true;
561738fd1498Szrj }
561838fd1498Szrj 
561938fd1498Szrj 
562038fd1498Szrj /* Function vectorizable_operation.
562138fd1498Szrj 
562238fd1498Szrj    Check if STMT performs a binary, unary or ternary operation that can
562338fd1498Szrj    be vectorized.
562438fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
562538fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
562638fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
562738fd1498Szrj 
562838fd1498Szrj static bool
vectorizable_operation(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)562938fd1498Szrj vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
563038fd1498Szrj 			gimple **vec_stmt, slp_tree slp_node)
563138fd1498Szrj {
563238fd1498Szrj   tree vec_dest;
563338fd1498Szrj   tree scalar_dest;
563438fd1498Szrj   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
563538fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
563638fd1498Szrj   tree vectype;
563738fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
563838fd1498Szrj   enum tree_code code, orig_code;
563938fd1498Szrj   machine_mode vec_mode;
564038fd1498Szrj   tree new_temp;
564138fd1498Szrj   int op_type;
564238fd1498Szrj   optab optab;
564338fd1498Szrj   bool target_support_p;
564438fd1498Szrj   gimple *def_stmt;
564538fd1498Szrj   enum vect_def_type dt[3]
564638fd1498Szrj     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
564738fd1498Szrj   int ndts = 3;
564838fd1498Szrj   gimple *new_stmt = NULL;
564938fd1498Szrj   stmt_vec_info prev_stmt_info;
565038fd1498Szrj   poly_uint64 nunits_in;
565138fd1498Szrj   poly_uint64 nunits_out;
565238fd1498Szrj   tree vectype_out;
565338fd1498Szrj   int ncopies;
565438fd1498Szrj   int j, i;
565538fd1498Szrj   vec<tree> vec_oprnds0 = vNULL;
565638fd1498Szrj   vec<tree> vec_oprnds1 = vNULL;
565738fd1498Szrj   vec<tree> vec_oprnds2 = vNULL;
565838fd1498Szrj   tree vop0, vop1, vop2;
565938fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
566038fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
566138fd1498Szrj 
566238fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
566338fd1498Szrj     return false;
566438fd1498Szrj 
566538fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
566638fd1498Szrj       && ! vec_stmt)
566738fd1498Szrj     return false;
566838fd1498Szrj 
566938fd1498Szrj   /* Is STMT a vectorizable binary/unary operation?   */
567038fd1498Szrj   if (!is_gimple_assign (stmt))
567138fd1498Szrj     return false;
567238fd1498Szrj 
567338fd1498Szrj   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
567438fd1498Szrj     return false;
567538fd1498Szrj 
567638fd1498Szrj   orig_code = code = gimple_assign_rhs_code (stmt);
567738fd1498Szrj 
567838fd1498Szrj   /* For pointer addition and subtraction, we should use the normal
567938fd1498Szrj      plus and minus for the vector operation.  */
568038fd1498Szrj   if (code == POINTER_PLUS_EXPR)
568138fd1498Szrj     code = PLUS_EXPR;
568238fd1498Szrj   if (code == POINTER_DIFF_EXPR)
568338fd1498Szrj     code = MINUS_EXPR;
568438fd1498Szrj 
568538fd1498Szrj   /* Support only unary or binary operations.  */
568638fd1498Szrj   op_type = TREE_CODE_LENGTH (code);
568738fd1498Szrj   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
568838fd1498Szrj     {
568938fd1498Szrj       if (dump_enabled_p ())
569038fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
569138fd1498Szrj                          "num. args = %d (not unary/binary/ternary op).\n",
569238fd1498Szrj                          op_type);
569338fd1498Szrj       return false;
569438fd1498Szrj     }
569538fd1498Szrj 
569638fd1498Szrj   scalar_dest = gimple_assign_lhs (stmt);
569738fd1498Szrj   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
569838fd1498Szrj 
569938fd1498Szrj   /* Most operations cannot handle bit-precision types without extra
570038fd1498Szrj      truncations.  */
570138fd1498Szrj   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
570238fd1498Szrj       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
570338fd1498Szrj       /* Exception are bitwise binary operations.  */
570438fd1498Szrj       && code != BIT_IOR_EXPR
570538fd1498Szrj       && code != BIT_XOR_EXPR
570638fd1498Szrj       && code != BIT_AND_EXPR)
570738fd1498Szrj     {
570838fd1498Szrj       if (dump_enabled_p ())
570938fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
571038fd1498Szrj                          "bit-precision arithmetic not supported.\n");
571138fd1498Szrj       return false;
571238fd1498Szrj     }
571338fd1498Szrj 
571438fd1498Szrj   op0 = gimple_assign_rhs1 (stmt);
571538fd1498Szrj   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
571638fd1498Szrj     {
571738fd1498Szrj       if (dump_enabled_p ())
571838fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
571938fd1498Szrj                          "use not simple.\n");
572038fd1498Szrj       return false;
572138fd1498Szrj     }
572238fd1498Szrj   /* If op0 is an external or constant def use a vector type with
572338fd1498Szrj      the same size as the output vector type.  */
572438fd1498Szrj   if (!vectype)
572538fd1498Szrj     {
572638fd1498Szrj       /* For boolean type we cannot determine vectype by
572738fd1498Szrj 	 invariant value (don't know whether it is a vector
572838fd1498Szrj 	 of booleans or vector of integers).  We use output
572938fd1498Szrj 	 vectype because operations on boolean don't change
573038fd1498Szrj 	 type.  */
573138fd1498Szrj       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
573238fd1498Szrj 	{
573338fd1498Szrj 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
573438fd1498Szrj 	    {
573538fd1498Szrj 	      if (dump_enabled_p ())
573638fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
573738fd1498Szrj 				 "not supported operation on bool value.\n");
573838fd1498Szrj 	      return false;
573938fd1498Szrj 	    }
574038fd1498Szrj 	  vectype = vectype_out;
574138fd1498Szrj 	}
574238fd1498Szrj       else
574338fd1498Szrj 	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
574438fd1498Szrj     }
574538fd1498Szrj   if (vec_stmt)
574638fd1498Szrj     gcc_assert (vectype);
574738fd1498Szrj   if (!vectype)
574838fd1498Szrj     {
574938fd1498Szrj       if (dump_enabled_p ())
575038fd1498Szrj         {
575138fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
575238fd1498Szrj                            "no vectype for scalar type ");
575338fd1498Szrj           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
575438fd1498Szrj                              TREE_TYPE (op0));
575538fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
575638fd1498Szrj         }
575738fd1498Szrj 
575838fd1498Szrj       return false;
575938fd1498Szrj     }
576038fd1498Szrj 
576138fd1498Szrj   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
576238fd1498Szrj   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
576338fd1498Szrj   if (maybe_ne (nunits_out, nunits_in))
576438fd1498Szrj     return false;
576538fd1498Szrj 
576638fd1498Szrj   if (op_type == binary_op || op_type == ternary_op)
576738fd1498Szrj     {
576838fd1498Szrj       op1 = gimple_assign_rhs2 (stmt);
576938fd1498Szrj       if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
577038fd1498Szrj 	{
577138fd1498Szrj 	  if (dump_enabled_p ())
577238fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
577338fd1498Szrj                              "use not simple.\n");
577438fd1498Szrj 	  return false;
577538fd1498Szrj 	}
577638fd1498Szrj     }
577738fd1498Szrj   if (op_type == ternary_op)
577838fd1498Szrj     {
577938fd1498Szrj       op2 = gimple_assign_rhs3 (stmt);
578038fd1498Szrj       if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
578138fd1498Szrj 	{
578238fd1498Szrj 	  if (dump_enabled_p ())
578338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
578438fd1498Szrj                              "use not simple.\n");
578538fd1498Szrj 	  return false;
578638fd1498Szrj 	}
578738fd1498Szrj     }
578838fd1498Szrj 
578938fd1498Szrj   /* Multiple types in SLP are handled by creating the appropriate number of
579038fd1498Szrj      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
579138fd1498Szrj      case of SLP.  */
579238fd1498Szrj   if (slp_node)
579338fd1498Szrj     ncopies = 1;
579438fd1498Szrj   else
579538fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
579638fd1498Szrj 
579738fd1498Szrj   gcc_assert (ncopies >= 1);
579838fd1498Szrj 
579938fd1498Szrj   /* Shifts are handled in vectorizable_shift ().  */
580038fd1498Szrj   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
580138fd1498Szrj       || code == RROTATE_EXPR)
580238fd1498Szrj    return false;
580338fd1498Szrj 
580438fd1498Szrj   /* Supportable by target?  */
580538fd1498Szrj 
580638fd1498Szrj   vec_mode = TYPE_MODE (vectype);
580738fd1498Szrj   if (code == MULT_HIGHPART_EXPR)
580838fd1498Szrj     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
580938fd1498Szrj   else
581038fd1498Szrj     {
581138fd1498Szrj       optab = optab_for_tree_code (code, vectype, optab_default);
581238fd1498Szrj       if (!optab)
581338fd1498Szrj 	{
581438fd1498Szrj           if (dump_enabled_p ())
581538fd1498Szrj             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
581638fd1498Szrj                              "no optab.\n");
581738fd1498Szrj 	  return false;
581838fd1498Szrj 	}
581938fd1498Szrj       target_support_p = (optab_handler (optab, vec_mode)
582038fd1498Szrj 			  != CODE_FOR_nothing);
582138fd1498Szrj     }
582238fd1498Szrj 
582338fd1498Szrj   if (!target_support_p)
582438fd1498Szrj     {
582538fd1498Szrj       if (dump_enabled_p ())
582638fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582738fd1498Szrj                          "op not supported by target.\n");
582838fd1498Szrj       /* Check only during analysis.  */
582938fd1498Szrj       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
583038fd1498Szrj 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
583138fd1498Szrj         return false;
583238fd1498Szrj       if (dump_enabled_p ())
583338fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
583438fd1498Szrj                          "proceeding using word mode.\n");
583538fd1498Szrj     }
583638fd1498Szrj 
583738fd1498Szrj   /* Worthwhile without SIMD support?  Check only during analysis.  */
583838fd1498Szrj   if (!VECTOR_MODE_P (vec_mode)
583938fd1498Szrj       && !vec_stmt
584038fd1498Szrj       && !vect_worthwhile_without_simd_p (vinfo, code))
584138fd1498Szrj     {
584238fd1498Szrj       if (dump_enabled_p ())
584338fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584438fd1498Szrj                          "not worthwhile without SIMD support.\n");
584538fd1498Szrj       return false;
584638fd1498Szrj     }
584738fd1498Szrj 
584838fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
584938fd1498Szrj     {
585038fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
585138fd1498Szrj       if (dump_enabled_p ())
585238fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
585338fd1498Szrj                          "=== vectorizable_operation ===\n");
585438fd1498Szrj       if (!slp_node)
585538fd1498Szrj 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
585638fd1498Szrj       return true;
585738fd1498Szrj     }
585838fd1498Szrj 
585938fd1498Szrj   /* Transform.  */
586038fd1498Szrj 
586138fd1498Szrj   if (dump_enabled_p ())
586238fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
586338fd1498Szrj                      "transform binary/unary operation.\n");
586438fd1498Szrj 
586538fd1498Szrj   /* Handle def.  */
586638fd1498Szrj   vec_dest = vect_create_destination_var (scalar_dest, vectype);
586738fd1498Szrj 
586838fd1498Szrj   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
586938fd1498Szrj      vectors with unsigned elements, but the result is signed.  So, we
587038fd1498Szrj      need to compute the MINUS_EXPR into vectype temporary and
587138fd1498Szrj      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
587238fd1498Szrj   tree vec_cvt_dest = NULL_TREE;
587338fd1498Szrj   if (orig_code == POINTER_DIFF_EXPR)
587438fd1498Szrj     vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
587538fd1498Szrj 
587638fd1498Szrj   /* In case the vectorization factor (VF) is bigger than the number
587738fd1498Szrj      of elements that we can fit in a vectype (nunits), we have to generate
587838fd1498Szrj      more than one vector stmt - i.e - we need to "unroll" the
587938fd1498Szrj      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
588038fd1498Szrj      from one copy of the vector stmt to the next, in the field
588138fd1498Szrj      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
588238fd1498Szrj      stages to find the correct vector defs to be used when vectorizing
588338fd1498Szrj      stmts that use the defs of the current stmt.  The example below
588438fd1498Szrj      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
588538fd1498Szrj      we need to create 4 vectorized stmts):
588638fd1498Szrj 
588738fd1498Szrj      before vectorization:
588838fd1498Szrj                                 RELATED_STMT    VEC_STMT
588938fd1498Szrj         S1:     x = memref      -               -
589038fd1498Szrj         S2:     z = x + 1       -               -
589138fd1498Szrj 
589238fd1498Szrj      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
589338fd1498Szrj              there):
589438fd1498Szrj                                 RELATED_STMT    VEC_STMT
589538fd1498Szrj         VS1_0:  vx0 = memref0   VS1_1           -
589638fd1498Szrj         VS1_1:  vx1 = memref1   VS1_2           -
589738fd1498Szrj         VS1_2:  vx2 = memref2   VS1_3           -
589838fd1498Szrj         VS1_3:  vx3 = memref3   -               -
589938fd1498Szrj         S1:     x = load        -               VS1_0
590038fd1498Szrj         S2:     z = x + 1       -               -
590138fd1498Szrj 
590238fd1498Szrj      step2: vectorize stmt S2 (done here):
590338fd1498Szrj         To vectorize stmt S2 we first need to find the relevant vector
590438fd1498Szrj         def for the first operand 'x'.  This is, as usual, obtained from
590538fd1498Szrj         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
590638fd1498Szrj         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
590738fd1498Szrj         relevant vector def 'vx0'.  Having found 'vx0' we can generate
590838fd1498Szrj         the vector stmt VS2_0, and as usual, record it in the
590938fd1498Szrj         STMT_VINFO_VEC_STMT of stmt S2.
591038fd1498Szrj         When creating the second copy (VS2_1), we obtain the relevant vector
591138fd1498Szrj         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
591238fd1498Szrj         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
591338fd1498Szrj         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
591438fd1498Szrj         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
591538fd1498Szrj         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
591638fd1498Szrj         chain of stmts and pointers:
591738fd1498Szrj                                 RELATED_STMT    VEC_STMT
591838fd1498Szrj         VS1_0:  vx0 = memref0   VS1_1           -
591938fd1498Szrj         VS1_1:  vx1 = memref1   VS1_2           -
592038fd1498Szrj         VS1_2:  vx2 = memref2   VS1_3           -
592138fd1498Szrj         VS1_3:  vx3 = memref3   -               -
592238fd1498Szrj         S1:     x = load        -               VS1_0
592338fd1498Szrj         VS2_0:  vz0 = vx0 + v1  VS2_1           -
592438fd1498Szrj         VS2_1:  vz1 = vx1 + v1  VS2_2           -
592538fd1498Szrj         VS2_2:  vz2 = vx2 + v1  VS2_3           -
592638fd1498Szrj         VS2_3:  vz3 = vx3 + v1  -               -
592738fd1498Szrj         S2:     z = x + 1       -               VS2_0  */
592838fd1498Szrj 
592938fd1498Szrj   prev_stmt_info = NULL;
593038fd1498Szrj   for (j = 0; j < ncopies; j++)
593138fd1498Szrj     {
593238fd1498Szrj       /* Handle uses.  */
593338fd1498Szrj       if (j == 0)
593438fd1498Szrj 	{
5935*58e805e6Szrj 	  if (op_type == binary_op)
593638fd1498Szrj 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
593738fd1498Szrj 			       slp_node);
5938*58e805e6Szrj 	  else if (op_type == ternary_op)
5939*58e805e6Szrj 	    {
5940*58e805e6Szrj 	      if (slp_node)
5941*58e805e6Szrj 		{
5942*58e805e6Szrj 		  auto_vec<tree> ops(3);
5943*58e805e6Szrj 		  ops.quick_push (op0);
5944*58e805e6Szrj 		  ops.quick_push (op1);
5945*58e805e6Szrj 		  ops.quick_push (op2);
5946*58e805e6Szrj 		  auto_vec<vec<tree> > vec_defs(3);
5947*58e805e6Szrj 		  vect_get_slp_defs (ops, slp_node, &vec_defs);
5948*58e805e6Szrj 		  vec_oprnds0 = vec_defs[0];
5949*58e805e6Szrj 		  vec_oprnds1 = vec_defs[1];
5950*58e805e6Szrj 		  vec_oprnds2 = vec_defs[2];
5951*58e805e6Szrj 		}
5952*58e805e6Szrj 	      else
5953*58e805e6Szrj 		{
5954*58e805e6Szrj 		  vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5955*58e805e6Szrj 				     NULL);
5956*58e805e6Szrj 		  vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5957*58e805e6Szrj 				     NULL);
5958*58e805e6Szrj 		}
5959*58e805e6Szrj 	    }
596038fd1498Szrj 	  else
596138fd1498Szrj 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
596238fd1498Szrj 			       slp_node);
596338fd1498Szrj 	}
596438fd1498Szrj       else
596538fd1498Szrj 	{
596638fd1498Szrj 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
596738fd1498Szrj 	  if (op_type == ternary_op)
596838fd1498Szrj 	    {
596938fd1498Szrj 	      tree vec_oprnd = vec_oprnds2.pop ();
597038fd1498Szrj 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
597138fd1498Szrj 							           vec_oprnd));
597238fd1498Szrj 	    }
597338fd1498Szrj 	}
597438fd1498Szrj 
597538fd1498Szrj       /* Arguments are ready.  Create the new vector stmt.  */
597638fd1498Szrj       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
597738fd1498Szrj         {
597838fd1498Szrj 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
597938fd1498Szrj 		  ? vec_oprnds1[i] : NULL_TREE);
598038fd1498Szrj 	  vop2 = ((op_type == ternary_op)
598138fd1498Szrj 		  ? vec_oprnds2[i] : NULL_TREE);
598238fd1498Szrj 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
598338fd1498Szrj 	  new_temp = make_ssa_name (vec_dest, new_stmt);
598438fd1498Szrj 	  gimple_assign_set_lhs (new_stmt, new_temp);
598538fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
598638fd1498Szrj 	  if (vec_cvt_dest)
598738fd1498Szrj 	    {
598838fd1498Szrj 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
598938fd1498Szrj 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
599038fd1498Szrj 					      new_temp);
599138fd1498Szrj 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
599238fd1498Szrj 	      gimple_assign_set_lhs (new_stmt, new_temp);
599338fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
599438fd1498Szrj 	    }
599538fd1498Szrj           if (slp_node)
599638fd1498Szrj 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
599738fd1498Szrj         }
599838fd1498Szrj 
599938fd1498Szrj       if (slp_node)
600038fd1498Szrj         continue;
600138fd1498Szrj 
600238fd1498Szrj       if (j == 0)
600338fd1498Szrj 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
600438fd1498Szrj       else
600538fd1498Szrj 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
600638fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
600738fd1498Szrj     }
600838fd1498Szrj 
600938fd1498Szrj   vec_oprnds0.release ();
601038fd1498Szrj   vec_oprnds1.release ();
601138fd1498Szrj   vec_oprnds2.release ();
601238fd1498Szrj 
601338fd1498Szrj   return true;
601438fd1498Szrj }
601538fd1498Szrj 
601638fd1498Szrj /* A helper function to ensure data reference DR's base alignment.  */
601738fd1498Szrj 
601838fd1498Szrj static void
ensure_base_align(struct data_reference * dr)601938fd1498Szrj ensure_base_align (struct data_reference *dr)
602038fd1498Szrj {
602138fd1498Szrj   if (!dr->aux)
602238fd1498Szrj     return;
602338fd1498Szrj 
602438fd1498Szrj   if (DR_VECT_AUX (dr)->base_misaligned)
602538fd1498Szrj     {
602638fd1498Szrj       tree base_decl = DR_VECT_AUX (dr)->base_decl;
602738fd1498Szrj 
602838fd1498Szrj       unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
602938fd1498Szrj 
603038fd1498Szrj       if (decl_in_symtab_p (base_decl))
603138fd1498Szrj 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
603238fd1498Szrj       else
603338fd1498Szrj 	{
603438fd1498Szrj 	  SET_DECL_ALIGN (base_decl, align_base_to);
603538fd1498Szrj           DECL_USER_ALIGN (base_decl) = 1;
603638fd1498Szrj 	}
603738fd1498Szrj       DR_VECT_AUX (dr)->base_misaligned = false;
603838fd1498Szrj     }
603938fd1498Szrj }
604038fd1498Szrj 
604138fd1498Szrj 
604238fd1498Szrj /* Function get_group_alias_ptr_type.
604338fd1498Szrj 
604438fd1498Szrj    Return the alias type for the group starting at FIRST_STMT.  */
604538fd1498Szrj 
604638fd1498Szrj static tree
get_group_alias_ptr_type(gimple * first_stmt)604738fd1498Szrj get_group_alias_ptr_type (gimple *first_stmt)
604838fd1498Szrj {
604938fd1498Szrj   struct data_reference *first_dr, *next_dr;
605038fd1498Szrj   gimple *next_stmt;
605138fd1498Szrj 
605238fd1498Szrj   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
605338fd1498Szrj   next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
605438fd1498Szrj   while (next_stmt)
605538fd1498Szrj     {
605638fd1498Szrj       next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
605738fd1498Szrj       if (get_alias_set (DR_REF (first_dr))
605838fd1498Szrj 	  != get_alias_set (DR_REF (next_dr)))
605938fd1498Szrj 	{
606038fd1498Szrj 	  if (dump_enabled_p ())
606138fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
606238fd1498Szrj 			     "conflicting alias set types.\n");
606338fd1498Szrj 	  return ptr_type_node;
606438fd1498Szrj 	}
606538fd1498Szrj       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
606638fd1498Szrj     }
606738fd1498Szrj   return reference_alias_ptr_type (DR_REF (first_dr));
606838fd1498Szrj }
606938fd1498Szrj 
607038fd1498Szrj 
607138fd1498Szrj /* Function vectorizable_store.
607238fd1498Szrj 
607338fd1498Szrj    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
607438fd1498Szrj    can be vectorized.
607538fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
607638fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
607738fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
607838fd1498Szrj 
607938fd1498Szrj static bool
vectorizable_store(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)608038fd1498Szrj vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
608138fd1498Szrj                     slp_tree slp_node)
608238fd1498Szrj {
608338fd1498Szrj   tree data_ref;
608438fd1498Szrj   tree op;
608538fd1498Szrj   tree vec_oprnd = NULL_TREE;
608638fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
608738fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
608838fd1498Szrj   tree elem_type;
608938fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
609038fd1498Szrj   struct loop *loop = NULL;
609138fd1498Szrj   machine_mode vec_mode;
609238fd1498Szrj   tree dummy;
609338fd1498Szrj   enum dr_alignment_support alignment_support_scheme;
609438fd1498Szrj   gimple *def_stmt;
609538fd1498Szrj   enum vect_def_type rhs_dt = vect_unknown_def_type;
609638fd1498Szrj   enum vect_def_type mask_dt = vect_unknown_def_type;
609738fd1498Szrj   stmt_vec_info prev_stmt_info = NULL;
609838fd1498Szrj   tree dataref_ptr = NULL_TREE;
609938fd1498Szrj   tree dataref_offset = NULL_TREE;
610038fd1498Szrj   gimple *ptr_incr = NULL;
610138fd1498Szrj   int ncopies;
610238fd1498Szrj   int j;
610338fd1498Szrj   gimple *next_stmt, *first_stmt;
610438fd1498Szrj   bool grouped_store;
610538fd1498Szrj   unsigned int group_size, i;
610638fd1498Szrj   vec<tree> oprnds = vNULL;
610738fd1498Szrj   vec<tree> result_chain = vNULL;
610838fd1498Szrj   bool inv_p;
610938fd1498Szrj   tree offset = NULL_TREE;
611038fd1498Szrj   vec<tree> vec_oprnds = vNULL;
611138fd1498Szrj   bool slp = (slp_node != NULL);
611238fd1498Szrj   unsigned int vec_num;
611338fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
611438fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
611538fd1498Szrj   tree aggr_type;
611638fd1498Szrj   gather_scatter_info gs_info;
611738fd1498Szrj   gimple *new_stmt;
611838fd1498Szrj   poly_uint64 vf;
611938fd1498Szrj   vec_load_store_type vls_type;
612038fd1498Szrj   tree ref_type;
612138fd1498Szrj 
612238fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
612338fd1498Szrj     return false;
612438fd1498Szrj 
612538fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
612638fd1498Szrj       && ! vec_stmt)
612738fd1498Szrj     return false;
612838fd1498Szrj 
612938fd1498Szrj   /* Is vectorizable store? */
613038fd1498Szrj 
613138fd1498Szrj   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
613238fd1498Szrj   if (is_gimple_assign (stmt))
613338fd1498Szrj     {
613438fd1498Szrj       tree scalar_dest = gimple_assign_lhs (stmt);
613538fd1498Szrj       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
613638fd1498Szrj 	  && is_pattern_stmt_p (stmt_info))
613738fd1498Szrj 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
613838fd1498Szrj       if (TREE_CODE (scalar_dest) != ARRAY_REF
613938fd1498Szrj 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
614038fd1498Szrj 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
614138fd1498Szrj 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
614238fd1498Szrj 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
614338fd1498Szrj 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
614438fd1498Szrj 	  && TREE_CODE (scalar_dest) != MEM_REF)
614538fd1498Szrj 	return false;
614638fd1498Szrj     }
614738fd1498Szrj   else
614838fd1498Szrj     {
614938fd1498Szrj       gcall *call = dyn_cast <gcall *> (stmt);
615038fd1498Szrj       if (!call || !gimple_call_internal_p (call))
615138fd1498Szrj 	return false;
615238fd1498Szrj 
615338fd1498Szrj       internal_fn ifn = gimple_call_internal_fn (call);
615438fd1498Szrj       if (!internal_store_fn_p (ifn))
615538fd1498Szrj 	return false;
615638fd1498Szrj 
615738fd1498Szrj       if (slp_node != NULL)
615838fd1498Szrj 	{
615938fd1498Szrj 	  if (dump_enabled_p ())
616038fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616138fd1498Szrj 			     "SLP of masked stores not supported.\n");
616238fd1498Szrj 	  return false;
616338fd1498Szrj 	}
616438fd1498Szrj 
616538fd1498Szrj       int mask_index = internal_fn_mask_index (ifn);
616638fd1498Szrj       if (mask_index >= 0)
616738fd1498Szrj 	{
616838fd1498Szrj 	  mask = gimple_call_arg (call, mask_index);
616938fd1498Szrj 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
617038fd1498Szrj 					   &mask_vectype))
617138fd1498Szrj 	    return false;
617238fd1498Szrj 	}
617338fd1498Szrj     }
617438fd1498Szrj 
617538fd1498Szrj   op = vect_get_store_rhs (stmt);
617638fd1498Szrj 
617738fd1498Szrj   /* Cannot have hybrid store SLP -- that would mean storing to the
617838fd1498Szrj      same location twice.  */
617938fd1498Szrj   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
618038fd1498Szrj 
618138fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
618238fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
618338fd1498Szrj 
618438fd1498Szrj   if (loop_vinfo)
618538fd1498Szrj     {
618638fd1498Szrj       loop = LOOP_VINFO_LOOP (loop_vinfo);
618738fd1498Szrj       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
618838fd1498Szrj     }
618938fd1498Szrj   else
619038fd1498Szrj     vf = 1;
619138fd1498Szrj 
619238fd1498Szrj   /* Multiple types in SLP are handled by creating the appropriate number of
619338fd1498Szrj      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
619438fd1498Szrj      case of SLP.  */
619538fd1498Szrj   if (slp)
619638fd1498Szrj     ncopies = 1;
619738fd1498Szrj   else
619838fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
619938fd1498Szrj 
620038fd1498Szrj   gcc_assert (ncopies >= 1);
620138fd1498Szrj 
620238fd1498Szrj   /* FORNOW.  This restriction should be relaxed.  */
620338fd1498Szrj   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
620438fd1498Szrj     {
620538fd1498Szrj       if (dump_enabled_p ())
620638fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
620738fd1498Szrj 			 "multiple types in nested loop.\n");
620838fd1498Szrj       return false;
620938fd1498Szrj     }
621038fd1498Szrj 
621138fd1498Szrj   if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
621238fd1498Szrj     return false;
621338fd1498Szrj 
621438fd1498Szrj   elem_type = TREE_TYPE (vectype);
621538fd1498Szrj   vec_mode = TYPE_MODE (vectype);
621638fd1498Szrj 
621738fd1498Szrj   if (!STMT_VINFO_DATA_REF (stmt_info))
621838fd1498Szrj     return false;
621938fd1498Szrj 
622038fd1498Szrj   vect_memory_access_type memory_access_type;
622138fd1498Szrj   if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
622238fd1498Szrj 			    &memory_access_type, &gs_info))
622338fd1498Szrj     return false;
622438fd1498Szrj 
622538fd1498Szrj   if (mask)
622638fd1498Szrj     {
622738fd1498Szrj       if (memory_access_type == VMAT_CONTIGUOUS)
622838fd1498Szrj 	{
622938fd1498Szrj 	  if (!VECTOR_MODE_P (vec_mode)
623038fd1498Szrj 	      || !can_vec_mask_load_store_p (vec_mode,
623138fd1498Szrj 					     TYPE_MODE (mask_vectype), false))
623238fd1498Szrj 	    return false;
623338fd1498Szrj 	}
623438fd1498Szrj       else if (memory_access_type != VMAT_LOAD_STORE_LANES
623538fd1498Szrj 	       && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
623638fd1498Szrj 	{
623738fd1498Szrj 	  if (dump_enabled_p ())
623838fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
623938fd1498Szrj 			     "unsupported access type for masked store.\n");
624038fd1498Szrj 	  return false;
624138fd1498Szrj 	}
624238fd1498Szrj     }
624338fd1498Szrj   else
624438fd1498Szrj     {
624538fd1498Szrj       /* FORNOW. In some cases can vectorize even if data-type not supported
624638fd1498Szrj 	 (e.g. - array initialization with 0).  */
624738fd1498Szrj       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
624838fd1498Szrj 	return false;
624938fd1498Szrj     }
625038fd1498Szrj 
625138fd1498Szrj   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
625238fd1498Szrj 		   && memory_access_type != VMAT_GATHER_SCATTER
625338fd1498Szrj 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
625438fd1498Szrj   if (grouped_store)
625538fd1498Szrj     {
625638fd1498Szrj       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
625738fd1498Szrj       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
625838fd1498Szrj       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
625938fd1498Szrj     }
626038fd1498Szrj   else
626138fd1498Szrj     {
626238fd1498Szrj       first_stmt = stmt;
626338fd1498Szrj       first_dr = dr;
626438fd1498Szrj       group_size = vec_num = 1;
626538fd1498Szrj     }
626638fd1498Szrj 
626738fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
626838fd1498Szrj     {
626938fd1498Szrj       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
627038fd1498Szrj 
627138fd1498Szrj       if (loop_vinfo
627238fd1498Szrj 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
627338fd1498Szrj 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
627438fd1498Szrj 				  memory_access_type, &gs_info);
627538fd1498Szrj 
627638fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
627738fd1498Szrj       /* The SLP costs are calculated during SLP analysis.  */
627838fd1498Szrj       if (!slp_node)
627938fd1498Szrj 	vect_model_store_cost (stmt_info, ncopies, memory_access_type,
628038fd1498Szrj 			       vls_type, NULL, NULL, NULL);
628138fd1498Szrj       return true;
628238fd1498Szrj     }
628338fd1498Szrj   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
628438fd1498Szrj 
628538fd1498Szrj   /* Transform.  */
628638fd1498Szrj 
628738fd1498Szrj   ensure_base_align (dr);
628838fd1498Szrj 
628938fd1498Szrj   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
629038fd1498Szrj     {
629138fd1498Szrj       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
629238fd1498Szrj       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
629338fd1498Szrj       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
629438fd1498Szrj       tree ptr, mask, var, scale, perm_mask = NULL_TREE;
629538fd1498Szrj       edge pe = loop_preheader_edge (loop);
629638fd1498Szrj       gimple_seq seq;
629738fd1498Szrj       basic_block new_bb;
629838fd1498Szrj       enum { NARROW, NONE, WIDEN } modifier;
629938fd1498Szrj       poly_uint64 scatter_off_nunits
630038fd1498Szrj 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
630138fd1498Szrj 
630238fd1498Szrj       if (known_eq (nunits, scatter_off_nunits))
630338fd1498Szrj 	modifier = NONE;
630438fd1498Szrj       else if (known_eq (nunits * 2, scatter_off_nunits))
630538fd1498Szrj 	{
630638fd1498Szrj 	  modifier = WIDEN;
630738fd1498Szrj 
630838fd1498Szrj 	  /* Currently gathers and scatters are only supported for
630938fd1498Szrj 	     fixed-length vectors.  */
631038fd1498Szrj 	  unsigned int count = scatter_off_nunits.to_constant ();
631138fd1498Szrj 	  vec_perm_builder sel (count, count, 1);
631238fd1498Szrj 	  for (i = 0; i < (unsigned int) count; ++i)
631338fd1498Szrj 	    sel.quick_push (i | (count / 2));
631438fd1498Szrj 
631538fd1498Szrj 	  vec_perm_indices indices (sel, 1, count);
631638fd1498Szrj 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
631738fd1498Szrj 						  indices);
631838fd1498Szrj 	  gcc_assert (perm_mask != NULL_TREE);
631938fd1498Szrj 	}
632038fd1498Szrj       else if (known_eq (nunits, scatter_off_nunits * 2))
632138fd1498Szrj 	{
632238fd1498Szrj 	  modifier = NARROW;
632338fd1498Szrj 
632438fd1498Szrj 	  /* Currently gathers and scatters are only supported for
632538fd1498Szrj 	     fixed-length vectors.  */
632638fd1498Szrj 	  unsigned int count = nunits.to_constant ();
632738fd1498Szrj 	  vec_perm_builder sel (count, count, 1);
632838fd1498Szrj 	  for (i = 0; i < (unsigned int) count; ++i)
632938fd1498Szrj 	    sel.quick_push (i | (count / 2));
633038fd1498Szrj 
633138fd1498Szrj 	  vec_perm_indices indices (sel, 2, count);
633238fd1498Szrj 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
633338fd1498Szrj 	  gcc_assert (perm_mask != NULL_TREE);
633438fd1498Szrj 	  ncopies *= 2;
633538fd1498Szrj 	}
633638fd1498Szrj       else
633738fd1498Szrj 	gcc_unreachable ();
633838fd1498Szrj 
633938fd1498Szrj       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
634038fd1498Szrj       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634138fd1498Szrj       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634238fd1498Szrj       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634338fd1498Szrj       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
634438fd1498Szrj       scaletype = TREE_VALUE (arglist);
634538fd1498Szrj 
634638fd1498Szrj       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
634738fd1498Szrj 			   && TREE_CODE (rettype) == VOID_TYPE);
634838fd1498Szrj 
634938fd1498Szrj       ptr = fold_convert (ptrtype, gs_info.base);
635038fd1498Szrj       if (!is_gimple_min_invariant (ptr))
635138fd1498Szrj 	{
635238fd1498Szrj 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
635338fd1498Szrj 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
635438fd1498Szrj 	  gcc_assert (!new_bb);
635538fd1498Szrj 	}
635638fd1498Szrj 
635738fd1498Szrj       /* Currently we support only unconditional scatter stores,
635838fd1498Szrj 	 so mask should be all ones.  */
635938fd1498Szrj       mask = build_int_cst (masktype, -1);
636038fd1498Szrj       mask = vect_init_vector (stmt, mask, masktype, NULL);
636138fd1498Szrj 
636238fd1498Szrj       scale = build_int_cst (scaletype, gs_info.scale);
636338fd1498Szrj 
636438fd1498Szrj       prev_stmt_info = NULL;
636538fd1498Szrj       for (j = 0; j < ncopies; ++j)
636638fd1498Szrj 	{
636738fd1498Szrj 	  if (j == 0)
636838fd1498Szrj 	    {
636938fd1498Szrj 	      src = vec_oprnd1
637038fd1498Szrj 		= vect_get_vec_def_for_operand (op, stmt);
637138fd1498Szrj 	      op = vec_oprnd0
637238fd1498Szrj 		= vect_get_vec_def_for_operand (gs_info.offset, stmt);
637338fd1498Szrj 	    }
637438fd1498Szrj 	  else if (modifier != NONE && (j & 1))
637538fd1498Szrj 	    {
637638fd1498Szrj 	      if (modifier == WIDEN)
637738fd1498Szrj 		{
637838fd1498Szrj 		  src = vec_oprnd1
637938fd1498Szrj 		    = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
638038fd1498Szrj 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
638138fd1498Szrj 					     stmt, gsi);
638238fd1498Szrj 		}
638338fd1498Szrj 	      else if (modifier == NARROW)
638438fd1498Szrj 		{
638538fd1498Szrj 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
638638fd1498Szrj 					      stmt, gsi);
638738fd1498Szrj 		  op = vec_oprnd0
638838fd1498Szrj 		    = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
638938fd1498Szrj 						      vec_oprnd0);
639038fd1498Szrj 		}
639138fd1498Szrj 	      else
639238fd1498Szrj 		gcc_unreachable ();
639338fd1498Szrj 	    }
639438fd1498Szrj 	  else
639538fd1498Szrj 	    {
639638fd1498Szrj 	      src = vec_oprnd1
639738fd1498Szrj 		= vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
639838fd1498Szrj 	      op = vec_oprnd0
639938fd1498Szrj 		= vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
640038fd1498Szrj 						  vec_oprnd0);
640138fd1498Szrj 	    }
640238fd1498Szrj 
640338fd1498Szrj 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
640438fd1498Szrj 	    {
640538fd1498Szrj 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
640638fd1498Szrj 				    TYPE_VECTOR_SUBPARTS (srctype)));
640738fd1498Szrj 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
640838fd1498Szrj 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
640938fd1498Szrj 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
641038fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
641138fd1498Szrj 	      src = var;
641238fd1498Szrj 	    }
641338fd1498Szrj 
641438fd1498Szrj 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
641538fd1498Szrj 	    {
641638fd1498Szrj 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
641738fd1498Szrj 				    TYPE_VECTOR_SUBPARTS (idxtype)));
641838fd1498Szrj 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
641938fd1498Szrj 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
642038fd1498Szrj 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
642138fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
642238fd1498Szrj 	      op = var;
642338fd1498Szrj 	    }
642438fd1498Szrj 
642538fd1498Szrj 	  new_stmt
642638fd1498Szrj 	    = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
642738fd1498Szrj 
642838fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
642938fd1498Szrj 
643038fd1498Szrj 	  if (prev_stmt_info == NULL)
643138fd1498Szrj 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
643238fd1498Szrj 	  else
643338fd1498Szrj 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
643438fd1498Szrj 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
643538fd1498Szrj 	}
643638fd1498Szrj       return true;
643738fd1498Szrj     }
643838fd1498Szrj 
643938fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
644038fd1498Szrj     {
644138fd1498Szrj       gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
644238fd1498Szrj       GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
644338fd1498Szrj     }
644438fd1498Szrj 
644538fd1498Szrj   if (grouped_store)
644638fd1498Szrj     {
644738fd1498Szrj       /* FORNOW */
644838fd1498Szrj       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
644938fd1498Szrj 
645038fd1498Szrj       /* We vectorize all the stmts of the interleaving group when we
645138fd1498Szrj 	 reach the last stmt in the group.  */
645238fd1498Szrj       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
645338fd1498Szrj 	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
645438fd1498Szrj 	  && !slp)
645538fd1498Szrj 	{
645638fd1498Szrj 	  *vec_stmt = NULL;
645738fd1498Szrj 	  return true;
645838fd1498Szrj 	}
645938fd1498Szrj 
646038fd1498Szrj       if (slp)
646138fd1498Szrj         {
646238fd1498Szrj           grouped_store = false;
646338fd1498Szrj           /* VEC_NUM is the number of vect stmts to be created for this
646438fd1498Szrj              group.  */
646538fd1498Szrj           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
646638fd1498Szrj           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
646738fd1498Szrj 	  gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
646838fd1498Szrj           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
646938fd1498Szrj 	  op = vect_get_store_rhs (first_stmt);
647038fd1498Szrj         }
647138fd1498Szrj       else
647238fd1498Szrj         /* VEC_NUM is the number of vect stmts to be created for this
647338fd1498Szrj            group.  */
647438fd1498Szrj 	vec_num = group_size;
647538fd1498Szrj 
647638fd1498Szrj       ref_type = get_group_alias_ptr_type (first_stmt);
647738fd1498Szrj     }
647838fd1498Szrj   else
647938fd1498Szrj     ref_type = reference_alias_ptr_type (DR_REF (first_dr));
648038fd1498Szrj 
648138fd1498Szrj   if (dump_enabled_p ())
648238fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
648338fd1498Szrj                      "transform store. ncopies = %d\n", ncopies);
648438fd1498Szrj 
648538fd1498Szrj   if (memory_access_type == VMAT_ELEMENTWISE
648638fd1498Szrj       || memory_access_type == VMAT_STRIDED_SLP)
648738fd1498Szrj     {
648838fd1498Szrj       gimple_stmt_iterator incr_gsi;
648938fd1498Szrj       bool insert_after;
649038fd1498Szrj       gimple *incr;
649138fd1498Szrj       tree offvar;
649238fd1498Szrj       tree ivstep;
649338fd1498Szrj       tree running_off;
649438fd1498Szrj       tree stride_base, stride_step, alias_off;
649538fd1498Szrj       tree vec_oprnd;
649638fd1498Szrj       unsigned int g;
649738fd1498Szrj       /* Checked by get_load_store_type.  */
649838fd1498Szrj       unsigned int const_nunits = nunits.to_constant ();
649938fd1498Szrj 
650038fd1498Szrj       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
650138fd1498Szrj       gcc_assert (!nested_in_vect_loop_p (loop, stmt));
650238fd1498Szrj 
650338fd1498Szrj       stride_base
650438fd1498Szrj 	= fold_build_pointer_plus
650538fd1498Szrj 	    (DR_BASE_ADDRESS (first_dr),
650638fd1498Szrj 	     size_binop (PLUS_EXPR,
650738fd1498Szrj 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
650838fd1498Szrj 			 convert_to_ptrofftype (DR_INIT (first_dr))));
650938fd1498Szrj       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
651038fd1498Szrj 
651138fd1498Szrj       /* For a store with loop-invariant (but other than power-of-2)
651238fd1498Szrj          stride (i.e. not a grouped access) like so:
651338fd1498Szrj 
651438fd1498Szrj 	   for (i = 0; i < n; i += stride)
651538fd1498Szrj 	     array[i] = ...;
651638fd1498Szrj 
651738fd1498Szrj 	 we generate a new induction variable and new stores from
651838fd1498Szrj 	 the components of the (vectorized) rhs:
651938fd1498Szrj 
652038fd1498Szrj 	   for (j = 0; ; j += VF*stride)
652138fd1498Szrj 	     vectemp = ...;
652238fd1498Szrj 	     tmp1 = vectemp[0];
652338fd1498Szrj 	     array[j] = tmp1;
652438fd1498Szrj 	     tmp2 = vectemp[1];
652538fd1498Szrj 	     array[j + stride] = tmp2;
652638fd1498Szrj 	     ...
652738fd1498Szrj          */
652838fd1498Szrj 
652938fd1498Szrj       unsigned nstores = const_nunits;
653038fd1498Szrj       unsigned lnel = 1;
653138fd1498Szrj       tree ltype = elem_type;
653238fd1498Szrj       tree lvectype = vectype;
653338fd1498Szrj       if (slp)
653438fd1498Szrj 	{
653538fd1498Szrj 	  if (group_size < const_nunits
653638fd1498Szrj 	      && const_nunits % group_size == 0)
653738fd1498Szrj 	    {
653838fd1498Szrj 	      nstores = const_nunits / group_size;
653938fd1498Szrj 	      lnel = group_size;
654038fd1498Szrj 	      ltype = build_vector_type (elem_type, group_size);
654138fd1498Szrj 	      lvectype = vectype;
654238fd1498Szrj 
654338fd1498Szrj 	      /* First check if vec_extract optab doesn't support extraction
654438fd1498Szrj 		 of vector elts directly.  */
654538fd1498Szrj 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
654638fd1498Szrj 	      machine_mode vmode;
654738fd1498Szrj 	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
654838fd1498Szrj 		  || !VECTOR_MODE_P (vmode)
654938fd1498Szrj 		  || !targetm.vector_mode_supported_p (vmode)
655038fd1498Szrj 		  || (convert_optab_handler (vec_extract_optab,
655138fd1498Szrj 					     TYPE_MODE (vectype), vmode)
655238fd1498Szrj 		      == CODE_FOR_nothing))
655338fd1498Szrj 		{
655438fd1498Szrj 		  /* Try to avoid emitting an extract of vector elements
655538fd1498Szrj 		     by performing the extracts using an integer type of the
655638fd1498Szrj 		     same size, extracting from a vector of those and then
655738fd1498Szrj 		     re-interpreting it as the original vector type if
655838fd1498Szrj 		     supported.  */
655938fd1498Szrj 		  unsigned lsize
656038fd1498Szrj 		    = group_size * GET_MODE_BITSIZE (elmode);
656138fd1498Szrj 		  elmode = int_mode_for_size (lsize, 0).require ();
656238fd1498Szrj 		  unsigned int lnunits = const_nunits / group_size;
656338fd1498Szrj 		  /* If we can't construct such a vector fall back to
656438fd1498Szrj 		     element extracts from the original vector type and
656538fd1498Szrj 		     element size stores.  */
656638fd1498Szrj 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
656738fd1498Szrj 		      && VECTOR_MODE_P (vmode)
656838fd1498Szrj 		      && targetm.vector_mode_supported_p (vmode)
656938fd1498Szrj 		      && (convert_optab_handler (vec_extract_optab,
657038fd1498Szrj 						 vmode, elmode)
657138fd1498Szrj 			  != CODE_FOR_nothing))
657238fd1498Szrj 		    {
657338fd1498Szrj 		      nstores = lnunits;
657438fd1498Szrj 		      lnel = group_size;
657538fd1498Szrj 		      ltype = build_nonstandard_integer_type (lsize, 1);
657638fd1498Szrj 		      lvectype = build_vector_type (ltype, nstores);
657738fd1498Szrj 		    }
657838fd1498Szrj 		  /* Else fall back to vector extraction anyway.
657938fd1498Szrj 		     Fewer stores are more important than avoiding spilling
658038fd1498Szrj 		     of the vector we extract from.  Compared to the
658138fd1498Szrj 		     construction case in vectorizable_load no store-forwarding
658238fd1498Szrj 		     issue exists here for reasonable archs.  */
658338fd1498Szrj 		}
658438fd1498Szrj 	    }
658538fd1498Szrj 	  else if (group_size >= const_nunits
658638fd1498Szrj 		   && group_size % const_nunits == 0)
658738fd1498Szrj 	    {
658838fd1498Szrj 	      nstores = 1;
658938fd1498Szrj 	      lnel = const_nunits;
659038fd1498Szrj 	      ltype = vectype;
659138fd1498Szrj 	      lvectype = vectype;
659238fd1498Szrj 	    }
659338fd1498Szrj 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
659438fd1498Szrj 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
659538fd1498Szrj 	}
659638fd1498Szrj 
659738fd1498Szrj       ivstep = stride_step;
659838fd1498Szrj       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
659938fd1498Szrj 			    build_int_cst (TREE_TYPE (ivstep), vf));
660038fd1498Szrj 
660138fd1498Szrj       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
660238fd1498Szrj 
660338fd1498Szrj       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
660438fd1498Szrj       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
660538fd1498Szrj       create_iv (stride_base, ivstep, NULL,
660638fd1498Szrj 		 loop, &incr_gsi, insert_after,
660738fd1498Szrj 		 &offvar, NULL);
660838fd1498Szrj       incr = gsi_stmt (incr_gsi);
660938fd1498Szrj       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
661038fd1498Szrj 
661138fd1498Szrj       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
661238fd1498Szrj 
661338fd1498Szrj       prev_stmt_info = NULL;
661438fd1498Szrj       alias_off = build_int_cst (ref_type, 0);
661538fd1498Szrj       next_stmt = first_stmt;
661638fd1498Szrj       for (g = 0; g < group_size; g++)
661738fd1498Szrj 	{
661838fd1498Szrj 	  running_off = offvar;
661938fd1498Szrj 	  if (g)
662038fd1498Szrj 	    {
662138fd1498Szrj 	      tree size = TYPE_SIZE_UNIT (ltype);
662238fd1498Szrj 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
662338fd1498Szrj 				      size);
662438fd1498Szrj 	      tree newoff = copy_ssa_name (running_off, NULL);
662538fd1498Szrj 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
662638fd1498Szrj 					  running_off, pos);
662738fd1498Szrj 	      vect_finish_stmt_generation (stmt, incr, gsi);
662838fd1498Szrj 	      running_off = newoff;
662938fd1498Szrj 	    }
663038fd1498Szrj 	  unsigned int group_el = 0;
663138fd1498Szrj 	  unsigned HOST_WIDE_INT
663238fd1498Szrj 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
663338fd1498Szrj 	  for (j = 0; j < ncopies; j++)
663438fd1498Szrj 	    {
663538fd1498Szrj 	      /* We've set op and dt above, from vect_get_store_rhs,
663638fd1498Szrj 		 and first_stmt == stmt.  */
663738fd1498Szrj 	      if (j == 0)
663838fd1498Szrj 		{
663938fd1498Szrj 		  if (slp)
664038fd1498Szrj 		    {
664138fd1498Szrj 		      vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
664238fd1498Szrj 					 slp_node);
664338fd1498Szrj 		      vec_oprnd = vec_oprnds[0];
664438fd1498Szrj 		    }
664538fd1498Szrj 		  else
664638fd1498Szrj 		    {
664738fd1498Szrj 		      op = vect_get_store_rhs (next_stmt);
664838fd1498Szrj 		      vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
664938fd1498Szrj 		    }
665038fd1498Szrj 		}
665138fd1498Szrj 	      else
665238fd1498Szrj 		{
665338fd1498Szrj 		  if (slp)
665438fd1498Szrj 		    vec_oprnd = vec_oprnds[j];
665538fd1498Szrj 		  else
665638fd1498Szrj 		    {
665738fd1498Szrj 		      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
665838fd1498Szrj 		      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
665938fd1498Szrj 								  vec_oprnd);
666038fd1498Szrj 		    }
666138fd1498Szrj 		}
666238fd1498Szrj 	      /* Pun the vector to extract from if necessary.  */
666338fd1498Szrj 	      if (lvectype != vectype)
666438fd1498Szrj 		{
666538fd1498Szrj 		  tree tem = make_ssa_name (lvectype);
666638fd1498Szrj 		  gimple *pun
666738fd1498Szrj 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
666838fd1498Szrj 							lvectype, vec_oprnd));
666938fd1498Szrj 		  vect_finish_stmt_generation (stmt, pun, gsi);
667038fd1498Szrj 		  vec_oprnd = tem;
667138fd1498Szrj 		}
667238fd1498Szrj 	      for (i = 0; i < nstores; i++)
667338fd1498Szrj 		{
667438fd1498Szrj 		  tree newref, newoff;
667538fd1498Szrj 		  gimple *incr, *assign;
667638fd1498Szrj 		  tree size = TYPE_SIZE (ltype);
667738fd1498Szrj 		  /* Extract the i'th component.  */
667838fd1498Szrj 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
667938fd1498Szrj 					  bitsize_int (i), size);
668038fd1498Szrj 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
668138fd1498Szrj 					   size, pos);
668238fd1498Szrj 
668338fd1498Szrj 		  elem = force_gimple_operand_gsi (gsi, elem, true,
668438fd1498Szrj 						   NULL_TREE, true,
668538fd1498Szrj 						   GSI_SAME_STMT);
668638fd1498Szrj 
668738fd1498Szrj 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
668838fd1498Szrj 						 group_el * elsz);
668938fd1498Szrj 		  newref = build2 (MEM_REF, ltype,
669038fd1498Szrj 				   running_off, this_off);
669138fd1498Szrj 		  vect_copy_ref_info (newref, DR_REF (first_dr));
669238fd1498Szrj 
669338fd1498Szrj 		  /* And store it to *running_off.  */
669438fd1498Szrj 		  assign = gimple_build_assign (newref, elem);
669538fd1498Szrj 		  vect_finish_stmt_generation (stmt, assign, gsi);
669638fd1498Szrj 
669738fd1498Szrj 		  group_el += lnel;
669838fd1498Szrj 		  if (! slp
669938fd1498Szrj 		      || group_el == group_size)
670038fd1498Szrj 		    {
670138fd1498Szrj 		      newoff = copy_ssa_name (running_off, NULL);
670238fd1498Szrj 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
670338fd1498Szrj 						  running_off, stride_step);
670438fd1498Szrj 		      vect_finish_stmt_generation (stmt, incr, gsi);
670538fd1498Szrj 
670638fd1498Szrj 		      running_off = newoff;
670738fd1498Szrj 		      group_el = 0;
670838fd1498Szrj 		    }
670938fd1498Szrj 		  if (g == group_size - 1
671038fd1498Szrj 		      && !slp)
671138fd1498Szrj 		    {
671238fd1498Szrj 		      if (j == 0 && i == 0)
671338fd1498Szrj 			STMT_VINFO_VEC_STMT (stmt_info)
671438fd1498Szrj 			    = *vec_stmt = assign;
671538fd1498Szrj 		      else
671638fd1498Szrj 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
671738fd1498Szrj 		      prev_stmt_info = vinfo_for_stmt (assign);
671838fd1498Szrj 		    }
671938fd1498Szrj 		}
672038fd1498Szrj 	    }
672138fd1498Szrj 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
672238fd1498Szrj 	  if (slp)
672338fd1498Szrj 	    break;
672438fd1498Szrj 	}
672538fd1498Szrj 
672638fd1498Szrj       vec_oprnds.release ();
672738fd1498Szrj       return true;
672838fd1498Szrj     }
672938fd1498Szrj 
673038fd1498Szrj   auto_vec<tree> dr_chain (group_size);
673138fd1498Szrj   oprnds.create (group_size);
673238fd1498Szrj 
673338fd1498Szrj   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
673438fd1498Szrj   gcc_assert (alignment_support_scheme);
673538fd1498Szrj   vec_loop_masks *loop_masks
673638fd1498Szrj     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
673738fd1498Szrj        ? &LOOP_VINFO_MASKS (loop_vinfo)
673838fd1498Szrj        : NULL);
673938fd1498Szrj   /* Targets with store-lane instructions must not require explicit
674038fd1498Szrj      realignment.  vect_supportable_dr_alignment always returns either
674138fd1498Szrj      dr_aligned or dr_unaligned_supported for masked operations.  */
674238fd1498Szrj   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
674338fd1498Szrj 	       && !mask
674438fd1498Szrj 	       && !loop_masks)
674538fd1498Szrj 	      || alignment_support_scheme == dr_aligned
674638fd1498Szrj 	      || alignment_support_scheme == dr_unaligned_supported);
674738fd1498Szrj 
674838fd1498Szrj   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
674938fd1498Szrj       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
675038fd1498Szrj     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
675138fd1498Szrj 
675238fd1498Szrj   tree bump;
675338fd1498Szrj   tree vec_offset = NULL_TREE;
675438fd1498Szrj   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
675538fd1498Szrj     {
675638fd1498Szrj       aggr_type = NULL_TREE;
675738fd1498Szrj       bump = NULL_TREE;
675838fd1498Szrj     }
675938fd1498Szrj   else if (memory_access_type == VMAT_GATHER_SCATTER)
676038fd1498Szrj     {
676138fd1498Szrj       aggr_type = elem_type;
676238fd1498Szrj       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
676338fd1498Szrj 				       &bump, &vec_offset);
676438fd1498Szrj     }
676538fd1498Szrj   else
676638fd1498Szrj     {
676738fd1498Szrj       if (memory_access_type == VMAT_LOAD_STORE_LANES)
676838fd1498Szrj 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
676938fd1498Szrj       else
677038fd1498Szrj 	aggr_type = vectype;
677138fd1498Szrj       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
677238fd1498Szrj     }
677338fd1498Szrj 
677438fd1498Szrj   if (mask)
677538fd1498Szrj     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
677638fd1498Szrj 
677738fd1498Szrj   /* In case the vectorization factor (VF) is bigger than the number
677838fd1498Szrj      of elements that we can fit in a vectype (nunits), we have to generate
677938fd1498Szrj      more than one vector stmt - i.e - we need to "unroll" the
678038fd1498Szrj      vector stmt by a factor VF/nunits.  For more details see documentation in
678138fd1498Szrj      vect_get_vec_def_for_copy_stmt.  */
678238fd1498Szrj 
678338fd1498Szrj   /* In case of interleaving (non-unit grouped access):
678438fd1498Szrj 
678538fd1498Szrj         S1:  &base + 2 = x2
678638fd1498Szrj         S2:  &base = x0
678738fd1498Szrj         S3:  &base + 1 = x1
678838fd1498Szrj         S4:  &base + 3 = x3
678938fd1498Szrj 
679038fd1498Szrj      We create vectorized stores starting from base address (the access of the
679138fd1498Szrj      first stmt in the chain (S2 in the above example), when the last store stmt
679238fd1498Szrj      of the chain (S4) is reached:
679338fd1498Szrj 
679438fd1498Szrj         VS1: &base = vx2
679538fd1498Szrj 	VS2: &base + vec_size*1 = vx0
679638fd1498Szrj 	VS3: &base + vec_size*2 = vx1
679738fd1498Szrj 	VS4: &base + vec_size*3 = vx3
679838fd1498Szrj 
679938fd1498Szrj      Then permutation statements are generated:
680038fd1498Szrj 
680138fd1498Szrj 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
680238fd1498Szrj 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
680338fd1498Szrj 	...
680438fd1498Szrj 
680538fd1498Szrj      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
680638fd1498Szrj      (the order of the data-refs in the output of vect_permute_store_chain
680738fd1498Szrj      corresponds to the order of scalar stmts in the interleaving chain - see
680838fd1498Szrj      the documentation of vect_permute_store_chain()).
680938fd1498Szrj 
681038fd1498Szrj      In case of both multiple types and interleaving, above vector stores and
681138fd1498Szrj      permutation stmts are created for every copy.  The result vector stmts are
681238fd1498Szrj      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
681338fd1498Szrj      STMT_VINFO_RELATED_STMT for the next copies.
681438fd1498Szrj   */
681538fd1498Szrj 
681638fd1498Szrj   prev_stmt_info = NULL;
681738fd1498Szrj   tree vec_mask = NULL_TREE;
681838fd1498Szrj   for (j = 0; j < ncopies; j++)
681938fd1498Szrj     {
682038fd1498Szrj 
682138fd1498Szrj       if (j == 0)
682238fd1498Szrj 	{
682338fd1498Szrj           if (slp)
682438fd1498Szrj             {
682538fd1498Szrj 	      /* Get vectorized arguments for SLP_NODE.  */
682638fd1498Szrj               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
682738fd1498Szrj                                  NULL, slp_node);
682838fd1498Szrj 
682938fd1498Szrj               vec_oprnd = vec_oprnds[0];
683038fd1498Szrj             }
683138fd1498Szrj           else
683238fd1498Szrj             {
683338fd1498Szrj 	      /* For interleaved stores we collect vectorized defs for all the
683438fd1498Szrj 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
683538fd1498Szrj 		 used as an input to vect_permute_store_chain(), and OPRNDS as
683638fd1498Szrj 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
683738fd1498Szrj 
683838fd1498Szrj 		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
683938fd1498Szrj 		 OPRNDS are of size 1.  */
684038fd1498Szrj 	      next_stmt = first_stmt;
684138fd1498Szrj 	      for (i = 0; i < group_size; i++)
684238fd1498Szrj 		{
684338fd1498Szrj 		  /* Since gaps are not supported for interleaved stores,
684438fd1498Szrj 		     GROUP_SIZE is the exact number of stmts in the chain.
684538fd1498Szrj 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
684638fd1498Szrj 		     there is no interleaving, GROUP_SIZE is 1, and only one
684738fd1498Szrj 		     iteration of the loop will be executed.  */
684838fd1498Szrj 		  op = vect_get_store_rhs (next_stmt);
684938fd1498Szrj 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
685038fd1498Szrj 		  dr_chain.quick_push (vec_oprnd);
685138fd1498Szrj 		  oprnds.quick_push (vec_oprnd);
685238fd1498Szrj 		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
685338fd1498Szrj 		}
685438fd1498Szrj 	      if (mask)
685538fd1498Szrj 		vec_mask = vect_get_vec_def_for_operand (mask, stmt,
685638fd1498Szrj 							 mask_vectype);
685738fd1498Szrj 	    }
685838fd1498Szrj 
685938fd1498Szrj 	  /* We should have catched mismatched types earlier.  */
686038fd1498Szrj 	  gcc_assert (useless_type_conversion_p (vectype,
686138fd1498Szrj 						 TREE_TYPE (vec_oprnd)));
686238fd1498Szrj 	  bool simd_lane_access_p
686338fd1498Szrj 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
686438fd1498Szrj 	  if (simd_lane_access_p
686538fd1498Szrj 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
686638fd1498Szrj 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
686738fd1498Szrj 	      && integer_zerop (DR_OFFSET (first_dr))
686838fd1498Szrj 	      && integer_zerop (DR_INIT (first_dr))
686938fd1498Szrj 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
687038fd1498Szrj 					get_alias_set (TREE_TYPE (ref_type))))
687138fd1498Szrj 	    {
687238fd1498Szrj 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
687338fd1498Szrj 	      dataref_offset = build_int_cst (ref_type, 0);
687438fd1498Szrj 	      inv_p = false;
687538fd1498Szrj 	    }
687638fd1498Szrj 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
687738fd1498Szrj 	    {
687838fd1498Szrj 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
687938fd1498Szrj 					   &dataref_ptr, &vec_offset);
688038fd1498Szrj 	      inv_p = false;
688138fd1498Szrj 	    }
688238fd1498Szrj 	  else
688338fd1498Szrj 	    dataref_ptr
688438fd1498Szrj 	      = vect_create_data_ref_ptr (first_stmt, aggr_type,
688538fd1498Szrj 					  simd_lane_access_p ? loop : NULL,
688638fd1498Szrj 					  offset, &dummy, gsi, &ptr_incr,
688738fd1498Szrj 					  simd_lane_access_p, &inv_p,
688838fd1498Szrj 					  NULL_TREE, bump);
688938fd1498Szrj 	  gcc_assert (bb_vinfo || !inv_p);
689038fd1498Szrj 	}
689138fd1498Szrj       else
689238fd1498Szrj 	{
689338fd1498Szrj 	  /* For interleaved stores we created vectorized defs for all the
689438fd1498Szrj 	     defs stored in OPRNDS in the previous iteration (previous copy).
689538fd1498Szrj 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
689638fd1498Szrj 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
689738fd1498Szrj 	     next copy.
689838fd1498Szrj 	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
689938fd1498Szrj 	     OPRNDS are of size 1.  */
690038fd1498Szrj 	  for (i = 0; i < group_size; i++)
690138fd1498Szrj 	    {
690238fd1498Szrj 	      op = oprnds[i];
690338fd1498Szrj 	      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
690438fd1498Szrj 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
690538fd1498Szrj 	      dr_chain[i] = vec_oprnd;
690638fd1498Szrj 	      oprnds[i] = vec_oprnd;
690738fd1498Szrj 	    }
690838fd1498Szrj 	  if (mask)
690938fd1498Szrj 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
691038fd1498Szrj 	  if (dataref_offset)
691138fd1498Szrj 	    dataref_offset
691238fd1498Szrj 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
691338fd1498Szrj 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
691438fd1498Szrj 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
691538fd1498Szrj 							 vec_offset);
691638fd1498Szrj 	  else
691738fd1498Szrj 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
691838fd1498Szrj 					   bump);
691938fd1498Szrj 	}
692038fd1498Szrj 
692138fd1498Szrj       if (memory_access_type == VMAT_LOAD_STORE_LANES)
692238fd1498Szrj 	{
692338fd1498Szrj 	  tree vec_array;
692438fd1498Szrj 
692538fd1498Szrj 	  /* Combine all the vectors into an array.  */
692638fd1498Szrj 	  vec_array = create_vector_array (vectype, vec_num);
692738fd1498Szrj 	  for (i = 0; i < vec_num; i++)
692838fd1498Szrj 	    {
692938fd1498Szrj 	      vec_oprnd = dr_chain[i];
693038fd1498Szrj 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
693138fd1498Szrj 	    }
693238fd1498Szrj 
693338fd1498Szrj 	  tree final_mask = NULL;
693438fd1498Szrj 	  if (loop_masks)
693538fd1498Szrj 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
693638fd1498Szrj 					     vectype, j);
693738fd1498Szrj 	  if (vec_mask)
693838fd1498Szrj 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
693938fd1498Szrj 						  vec_mask, gsi);
694038fd1498Szrj 
694138fd1498Szrj 	  gcall *call;
694238fd1498Szrj 	  if (final_mask)
694338fd1498Szrj 	    {
694438fd1498Szrj 	      /* Emit:
694538fd1498Szrj 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
694638fd1498Szrj 				     VEC_ARRAY).  */
694738fd1498Szrj 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
694838fd1498Szrj 	      tree alias_ptr = build_int_cst (ref_type, align);
694938fd1498Szrj 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
695038fd1498Szrj 						 dataref_ptr, alias_ptr,
695138fd1498Szrj 						 final_mask, vec_array);
695238fd1498Szrj 	    }
695338fd1498Szrj 	  else
695438fd1498Szrj 	    {
695538fd1498Szrj 	      /* Emit:
695638fd1498Szrj 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
695738fd1498Szrj 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
695838fd1498Szrj 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
695938fd1498Szrj 						 vec_array);
696038fd1498Szrj 	      gimple_call_set_lhs (call, data_ref);
696138fd1498Szrj 	    }
696238fd1498Szrj 	  gimple_call_set_nothrow (call, true);
696338fd1498Szrj 	  new_stmt = call;
696438fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
696538fd1498Szrj 	}
696638fd1498Szrj       else
696738fd1498Szrj 	{
696838fd1498Szrj 	  new_stmt = NULL;
696938fd1498Szrj 	  if (grouped_store)
697038fd1498Szrj 	    {
697138fd1498Szrj 	      if (j == 0)
697238fd1498Szrj 		result_chain.create (group_size);
697338fd1498Szrj 	      /* Permute.  */
697438fd1498Szrj 	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
697538fd1498Szrj 					&result_chain);
697638fd1498Szrj 	    }
697738fd1498Szrj 
697838fd1498Szrj 	  next_stmt = first_stmt;
697938fd1498Szrj 	  for (i = 0; i < vec_num; i++)
698038fd1498Szrj 	    {
698138fd1498Szrj 	      unsigned align, misalign;
698238fd1498Szrj 
698338fd1498Szrj 	      tree final_mask = NULL_TREE;
698438fd1498Szrj 	      if (loop_masks)
698538fd1498Szrj 		final_mask = vect_get_loop_mask (gsi, loop_masks,
698638fd1498Szrj 						 vec_num * ncopies,
698738fd1498Szrj 						 vectype, vec_num * j + i);
698838fd1498Szrj 	      if (vec_mask)
698938fd1498Szrj 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
699038fd1498Szrj 						      vec_mask, gsi);
699138fd1498Szrj 
699238fd1498Szrj 	      if (memory_access_type == VMAT_GATHER_SCATTER)
699338fd1498Szrj 		{
699438fd1498Szrj 		  tree scale = size_int (gs_info.scale);
699538fd1498Szrj 		  gcall *call;
699638fd1498Szrj 		  if (loop_masks)
699738fd1498Szrj 		    call = gimple_build_call_internal
699838fd1498Szrj 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
699938fd1498Szrj 		       scale, vec_oprnd, final_mask);
700038fd1498Szrj 		  else
700138fd1498Szrj 		    call = gimple_build_call_internal
700238fd1498Szrj 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
700338fd1498Szrj 		       scale, vec_oprnd);
700438fd1498Szrj 		  gimple_call_set_nothrow (call, true);
700538fd1498Szrj 		  new_stmt = call;
700638fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
700738fd1498Szrj 		  break;
700838fd1498Szrj 		}
700938fd1498Szrj 
701038fd1498Szrj 	      if (i > 0)
701138fd1498Szrj 		/* Bump the vector pointer.  */
701238fd1498Szrj 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
701338fd1498Szrj 					       stmt, bump);
701438fd1498Szrj 
701538fd1498Szrj 	      if (slp)
701638fd1498Szrj 		vec_oprnd = vec_oprnds[i];
701738fd1498Szrj 	      else if (grouped_store)
701838fd1498Szrj 		/* For grouped stores vectorized defs are interleaved in
701938fd1498Szrj 		   vect_permute_store_chain().  */
702038fd1498Szrj 		vec_oprnd = result_chain[i];
702138fd1498Szrj 
702238fd1498Szrj 	      align = DR_TARGET_ALIGNMENT (first_dr);
702338fd1498Szrj 	      if (aligned_access_p (first_dr))
702438fd1498Szrj 		misalign = 0;
702538fd1498Szrj 	      else if (DR_MISALIGNMENT (first_dr) == -1)
702638fd1498Szrj 		{
702738fd1498Szrj 		  align = dr_alignment (vect_dr_behavior (first_dr));
702838fd1498Szrj 		  misalign = 0;
702938fd1498Szrj 		}
703038fd1498Szrj 	      else
703138fd1498Szrj 		misalign = DR_MISALIGNMENT (first_dr);
703238fd1498Szrj 	      if (dataref_offset == NULL_TREE
703338fd1498Szrj 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
703438fd1498Szrj 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
703538fd1498Szrj 					misalign);
703638fd1498Szrj 
703738fd1498Szrj 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
703838fd1498Szrj 		{
703938fd1498Szrj 		  tree perm_mask = perm_mask_for_reverse (vectype);
704038fd1498Szrj 		  tree perm_dest
704138fd1498Szrj 		    = vect_create_destination_var (vect_get_store_rhs (stmt),
704238fd1498Szrj 						   vectype);
704338fd1498Szrj 		  tree new_temp = make_ssa_name (perm_dest);
704438fd1498Szrj 
704538fd1498Szrj 		  /* Generate the permute statement.  */
704638fd1498Szrj 		  gimple *perm_stmt
704738fd1498Szrj 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
704838fd1498Szrj 					   vec_oprnd, perm_mask);
704938fd1498Szrj 		  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
705038fd1498Szrj 
705138fd1498Szrj 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
705238fd1498Szrj 		  vec_oprnd = new_temp;
705338fd1498Szrj 		}
705438fd1498Szrj 
705538fd1498Szrj 	      /* Arguments are ready.  Create the new vector stmt.  */
705638fd1498Szrj 	      if (final_mask)
705738fd1498Szrj 		{
705838fd1498Szrj 		  align = least_bit_hwi (misalign | align);
705938fd1498Szrj 		  tree ptr = build_int_cst (ref_type, align);
706038fd1498Szrj 		  gcall *call
706138fd1498Szrj 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
706238fd1498Szrj 						  dataref_ptr, ptr,
706338fd1498Szrj 						  final_mask, vec_oprnd);
706438fd1498Szrj 		  gimple_call_set_nothrow (call, true);
706538fd1498Szrj 		  new_stmt = call;
706638fd1498Szrj 		}
706738fd1498Szrj 	      else
706838fd1498Szrj 		{
706938fd1498Szrj 		  data_ref = fold_build2 (MEM_REF, vectype,
707038fd1498Szrj 					  dataref_ptr,
707138fd1498Szrj 					  dataref_offset
707238fd1498Szrj 					  ? dataref_offset
707338fd1498Szrj 					  : build_int_cst (ref_type, 0));
707438fd1498Szrj 		  if (aligned_access_p (first_dr))
707538fd1498Szrj 		    ;
707638fd1498Szrj 		  else if (DR_MISALIGNMENT (first_dr) == -1)
707738fd1498Szrj 		    TREE_TYPE (data_ref)
707838fd1498Szrj 		      = build_aligned_type (TREE_TYPE (data_ref),
707938fd1498Szrj 					    align * BITS_PER_UNIT);
708038fd1498Szrj 		  else
708138fd1498Szrj 		    TREE_TYPE (data_ref)
708238fd1498Szrj 		      = build_aligned_type (TREE_TYPE (data_ref),
708338fd1498Szrj 					    TYPE_ALIGN (elem_type));
708438fd1498Szrj 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
708538fd1498Szrj 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
708638fd1498Szrj 		}
708738fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
708838fd1498Szrj 
708938fd1498Szrj 	      if (slp)
709038fd1498Szrj 		continue;
709138fd1498Szrj 
709238fd1498Szrj 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
709338fd1498Szrj 	      if (!next_stmt)
709438fd1498Szrj 		break;
709538fd1498Szrj 	    }
709638fd1498Szrj 	}
709738fd1498Szrj       if (!slp)
709838fd1498Szrj 	{
709938fd1498Szrj 	  if (j == 0)
710038fd1498Szrj 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
710138fd1498Szrj 	  else
710238fd1498Szrj 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
710338fd1498Szrj 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
710438fd1498Szrj 	}
710538fd1498Szrj     }
710638fd1498Szrj 
710738fd1498Szrj   oprnds.release ();
710838fd1498Szrj   result_chain.release ();
710938fd1498Szrj   vec_oprnds.release ();
711038fd1498Szrj 
711138fd1498Szrj   return true;
711238fd1498Szrj }
711338fd1498Szrj 
711438fd1498Szrj /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
711538fd1498Szrj    VECTOR_CST mask.  No checks are made that the target platform supports the
711638fd1498Szrj    mask, so callers may wish to test can_vec_perm_const_p separately, or use
711738fd1498Szrj    vect_gen_perm_mask_checked.  */
711838fd1498Szrj 
711938fd1498Szrj tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)712038fd1498Szrj vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
712138fd1498Szrj {
712238fd1498Szrj   tree mask_type;
712338fd1498Szrj 
712438fd1498Szrj   poly_uint64 nunits = sel.length ();
712538fd1498Szrj   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
712638fd1498Szrj 
712738fd1498Szrj   mask_type = build_vector_type (ssizetype, nunits);
712838fd1498Szrj   return vec_perm_indices_to_tree (mask_type, sel);
712938fd1498Szrj }
713038fd1498Szrj 
713138fd1498Szrj /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
713238fd1498Szrj    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
713338fd1498Szrj 
713438fd1498Szrj tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)713538fd1498Szrj vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
713638fd1498Szrj {
713738fd1498Szrj   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
713838fd1498Szrj   return vect_gen_perm_mask_any (vectype, sel);
713938fd1498Szrj }
714038fd1498Szrj 
714138fd1498Szrj /* Given a vector variable X and Y, that was generated for the scalar
714238fd1498Szrj    STMT, generate instructions to permute the vector elements of X and Y
714338fd1498Szrj    using permutation mask MASK_VEC, insert them at *GSI and return the
714438fd1498Szrj    permuted vector variable.  */
714538fd1498Szrj 
714638fd1498Szrj static tree
permute_vec_elements(tree x,tree y,tree mask_vec,gimple * stmt,gimple_stmt_iterator * gsi)714738fd1498Szrj permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
714838fd1498Szrj 		      gimple_stmt_iterator *gsi)
714938fd1498Szrj {
715038fd1498Szrj   tree vectype = TREE_TYPE (x);
715138fd1498Szrj   tree perm_dest, data_ref;
715238fd1498Szrj   gimple *perm_stmt;
715338fd1498Szrj 
715438fd1498Szrj   tree scalar_dest = gimple_get_lhs (stmt);
715538fd1498Szrj   if (TREE_CODE (scalar_dest) == SSA_NAME)
715638fd1498Szrj     perm_dest = vect_create_destination_var (scalar_dest, vectype);
715738fd1498Szrj   else
715838fd1498Szrj     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
715938fd1498Szrj   data_ref = make_ssa_name (perm_dest);
716038fd1498Szrj 
716138fd1498Szrj   /* Generate the permute statement.  */
716238fd1498Szrj   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
716338fd1498Szrj   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
716438fd1498Szrj 
716538fd1498Szrj   return data_ref;
716638fd1498Szrj }
716738fd1498Szrj 
716838fd1498Szrj /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
716938fd1498Szrj    inserting them on the loops preheader edge.  Returns true if we
717038fd1498Szrj    were successful in doing so (and thus STMT can be moved then),
717138fd1498Szrj    otherwise returns false.  */
717238fd1498Szrj 
717338fd1498Szrj static bool
hoist_defs_of_uses(gimple * stmt,struct loop * loop)717438fd1498Szrj hoist_defs_of_uses (gimple *stmt, struct loop *loop)
717538fd1498Szrj {
717638fd1498Szrj   ssa_op_iter i;
717738fd1498Szrj   tree op;
717838fd1498Szrj   bool any = false;
717938fd1498Szrj 
718038fd1498Szrj   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
718138fd1498Szrj     {
718238fd1498Szrj       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
718338fd1498Szrj       if (!gimple_nop_p (def_stmt)
718438fd1498Szrj 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
718538fd1498Szrj 	{
718638fd1498Szrj 	  /* Make sure we don't need to recurse.  While we could do
718738fd1498Szrj 	     so in simple cases when there are more complex use webs
718838fd1498Szrj 	     we don't have an easy way to preserve stmt order to fulfil
718938fd1498Szrj 	     dependencies within them.  */
719038fd1498Szrj 	  tree op2;
719138fd1498Szrj 	  ssa_op_iter i2;
719238fd1498Szrj 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
719338fd1498Szrj 	    return false;
719438fd1498Szrj 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
719538fd1498Szrj 	    {
719638fd1498Szrj 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
719738fd1498Szrj 	      if (!gimple_nop_p (def_stmt2)
719838fd1498Szrj 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
719938fd1498Szrj 		return false;
720038fd1498Szrj 	    }
720138fd1498Szrj 	  any = true;
720238fd1498Szrj 	}
720338fd1498Szrj     }
720438fd1498Szrj 
720538fd1498Szrj   if (!any)
720638fd1498Szrj     return true;
720738fd1498Szrj 
720838fd1498Szrj   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
720938fd1498Szrj     {
721038fd1498Szrj       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
721138fd1498Szrj       if (!gimple_nop_p (def_stmt)
721238fd1498Szrj 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
721338fd1498Szrj 	{
721438fd1498Szrj 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
721538fd1498Szrj 	  gsi_remove (&gsi, false);
721638fd1498Szrj 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
721738fd1498Szrj 	}
721838fd1498Szrj     }
721938fd1498Szrj 
722038fd1498Szrj   return true;
722138fd1498Szrj }
722238fd1498Szrj 
722338fd1498Szrj /* vectorizable_load.
722438fd1498Szrj 
722538fd1498Szrj    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
722638fd1498Szrj    can be vectorized.
722738fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
722838fd1498Szrj    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
722938fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
723038fd1498Szrj 
723138fd1498Szrj static bool
vectorizable_load(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_instance slp_node_instance)723238fd1498Szrj vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
723338fd1498Szrj                    slp_tree slp_node, slp_instance slp_node_instance)
723438fd1498Szrj {
723538fd1498Szrj   tree scalar_dest;
723638fd1498Szrj   tree vec_dest = NULL;
723738fd1498Szrj   tree data_ref = NULL;
723838fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
723938fd1498Szrj   stmt_vec_info prev_stmt_info;
724038fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
724138fd1498Szrj   struct loop *loop = NULL;
724238fd1498Szrj   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
724338fd1498Szrj   bool nested_in_vect_loop = false;
724438fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
724538fd1498Szrj   tree elem_type;
724638fd1498Szrj   tree new_temp;
724738fd1498Szrj   machine_mode mode;
724838fd1498Szrj   gimple *new_stmt = NULL;
724938fd1498Szrj   tree dummy;
725038fd1498Szrj   enum dr_alignment_support alignment_support_scheme;
725138fd1498Szrj   tree dataref_ptr = NULL_TREE;
725238fd1498Szrj   tree dataref_offset = NULL_TREE;
725338fd1498Szrj   gimple *ptr_incr = NULL;
725438fd1498Szrj   int ncopies;
725538fd1498Szrj   int i, j;
725638fd1498Szrj   unsigned int group_size;
725738fd1498Szrj   poly_uint64 group_gap_adj;
725838fd1498Szrj   tree msq = NULL_TREE, lsq;
725938fd1498Szrj   tree offset = NULL_TREE;
726038fd1498Szrj   tree byte_offset = NULL_TREE;
726138fd1498Szrj   tree realignment_token = NULL_TREE;
726238fd1498Szrj   gphi *phi = NULL;
726338fd1498Szrj   vec<tree> dr_chain = vNULL;
726438fd1498Szrj   bool grouped_load = false;
726538fd1498Szrj   gimple *first_stmt;
726638fd1498Szrj   gimple *first_stmt_for_drptr = NULL;
726738fd1498Szrj   bool inv_p;
726838fd1498Szrj   bool compute_in_loop = false;
726938fd1498Szrj   struct loop *at_loop;
727038fd1498Szrj   int vec_num;
727138fd1498Szrj   bool slp = (slp_node != NULL);
727238fd1498Szrj   bool slp_perm = false;
727338fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
727438fd1498Szrj   poly_uint64 vf;
727538fd1498Szrj   tree aggr_type;
727638fd1498Szrj   gather_scatter_info gs_info;
727738fd1498Szrj   vec_info *vinfo = stmt_info->vinfo;
727838fd1498Szrj   tree ref_type;
727938fd1498Szrj   enum vect_def_type mask_dt = vect_unknown_def_type;
728038fd1498Szrj 
728138fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
728238fd1498Szrj     return false;
728338fd1498Szrj 
728438fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
728538fd1498Szrj       && ! vec_stmt)
728638fd1498Szrj     return false;
728738fd1498Szrj 
728838fd1498Szrj   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
728938fd1498Szrj   if (is_gimple_assign (stmt))
729038fd1498Szrj     {
729138fd1498Szrj       scalar_dest = gimple_assign_lhs (stmt);
729238fd1498Szrj       if (TREE_CODE (scalar_dest) != SSA_NAME)
729338fd1498Szrj 	return false;
729438fd1498Szrj 
729538fd1498Szrj       tree_code code = gimple_assign_rhs_code (stmt);
729638fd1498Szrj       if (code != ARRAY_REF
729738fd1498Szrj 	  && code != BIT_FIELD_REF
729838fd1498Szrj 	  && code != INDIRECT_REF
729938fd1498Szrj 	  && code != COMPONENT_REF
730038fd1498Szrj 	  && code != IMAGPART_EXPR
730138fd1498Szrj 	  && code != REALPART_EXPR
730238fd1498Szrj 	  && code != MEM_REF
730338fd1498Szrj 	  && TREE_CODE_CLASS (code) != tcc_declaration)
730438fd1498Szrj 	return false;
730538fd1498Szrj     }
730638fd1498Szrj   else
730738fd1498Szrj     {
730838fd1498Szrj       gcall *call = dyn_cast <gcall *> (stmt);
730938fd1498Szrj       if (!call || !gimple_call_internal_p (call))
731038fd1498Szrj 	return false;
731138fd1498Szrj 
731238fd1498Szrj       internal_fn ifn = gimple_call_internal_fn (call);
731338fd1498Szrj       if (!internal_load_fn_p (ifn))
731438fd1498Szrj 	return false;
731538fd1498Szrj 
731638fd1498Szrj       scalar_dest = gimple_call_lhs (call);
731738fd1498Szrj       if (!scalar_dest)
731838fd1498Szrj 	return false;
731938fd1498Szrj 
732038fd1498Szrj       if (slp_node != NULL)
732138fd1498Szrj 	{
732238fd1498Szrj 	  if (dump_enabled_p ())
732338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732438fd1498Szrj 			     "SLP of masked loads not supported.\n");
732538fd1498Szrj 	  return false;
732638fd1498Szrj 	}
732738fd1498Szrj 
732838fd1498Szrj       int mask_index = internal_fn_mask_index (ifn);
732938fd1498Szrj       if (mask_index >= 0)
733038fd1498Szrj 	{
733138fd1498Szrj 	  mask = gimple_call_arg (call, mask_index);
733238fd1498Szrj 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
733338fd1498Szrj 					   &mask_vectype))
733438fd1498Szrj 	    return false;
733538fd1498Szrj 	}
733638fd1498Szrj     }
733738fd1498Szrj 
733838fd1498Szrj   if (!STMT_VINFO_DATA_REF (stmt_info))
733938fd1498Szrj     return false;
734038fd1498Szrj 
734138fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
734238fd1498Szrj   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
734338fd1498Szrj 
734438fd1498Szrj   if (loop_vinfo)
734538fd1498Szrj     {
734638fd1498Szrj       loop = LOOP_VINFO_LOOP (loop_vinfo);
734738fd1498Szrj       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
734838fd1498Szrj       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
734938fd1498Szrj     }
735038fd1498Szrj   else
735138fd1498Szrj     vf = 1;
735238fd1498Szrj 
735338fd1498Szrj   /* Multiple types in SLP are handled by creating the appropriate number of
735438fd1498Szrj      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
735538fd1498Szrj      case of SLP.  */
735638fd1498Szrj   if (slp)
735738fd1498Szrj     ncopies = 1;
735838fd1498Szrj   else
735938fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
736038fd1498Szrj 
736138fd1498Szrj   gcc_assert (ncopies >= 1);
736238fd1498Szrj 
736338fd1498Szrj   /* FORNOW. This restriction should be relaxed.  */
736438fd1498Szrj   if (nested_in_vect_loop && ncopies > 1)
736538fd1498Szrj     {
736638fd1498Szrj       if (dump_enabled_p ())
736738fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
736838fd1498Szrj                          "multiple types in nested loop.\n");
736938fd1498Szrj       return false;
737038fd1498Szrj     }
737138fd1498Szrj 
737238fd1498Szrj   /* Invalidate assumptions made by dependence analysis when vectorization
737338fd1498Szrj      on the unrolled body effectively re-orders stmts.  */
737438fd1498Szrj   if (ncopies > 1
737538fd1498Szrj       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
737638fd1498Szrj       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
737738fd1498Szrj 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
737838fd1498Szrj     {
737938fd1498Szrj       if (dump_enabled_p ())
738038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738138fd1498Szrj 			 "cannot perform implicit CSE when unrolling "
738238fd1498Szrj 			 "with negative dependence distance\n");
738338fd1498Szrj       return false;
738438fd1498Szrj     }
738538fd1498Szrj 
738638fd1498Szrj   elem_type = TREE_TYPE (vectype);
738738fd1498Szrj   mode = TYPE_MODE (vectype);
738838fd1498Szrj 
738938fd1498Szrj   /* FORNOW. In some cases can vectorize even if data-type not supported
739038fd1498Szrj     (e.g. - data copies).  */
739138fd1498Szrj   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
739238fd1498Szrj     {
739338fd1498Szrj       if (dump_enabled_p ())
739438fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739538fd1498Szrj                          "Aligned load, but unsupported type.\n");
739638fd1498Szrj       return false;
739738fd1498Szrj     }
739838fd1498Szrj 
739938fd1498Szrj   /* Check if the load is a part of an interleaving chain.  */
740038fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
740138fd1498Szrj     {
740238fd1498Szrj       grouped_load = true;
740338fd1498Szrj       /* FORNOW */
740438fd1498Szrj       gcc_assert (!nested_in_vect_loop);
740538fd1498Szrj       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
740638fd1498Szrj 
740738fd1498Szrj       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
740838fd1498Szrj       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
740938fd1498Szrj 
741038fd1498Szrj       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
741138fd1498Szrj 	slp_perm = true;
741238fd1498Szrj 
741338fd1498Szrj       /* Invalidate assumptions made by dependence analysis when vectorization
741438fd1498Szrj 	 on the unrolled body effectively re-orders stmts.  */
741538fd1498Szrj       if (!PURE_SLP_STMT (stmt_info)
741638fd1498Szrj 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
741738fd1498Szrj 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
741838fd1498Szrj 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
741938fd1498Szrj 	{
742038fd1498Szrj 	  if (dump_enabled_p ())
742138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742238fd1498Szrj 			     "cannot perform implicit CSE when performing "
742338fd1498Szrj 			     "group loads with negative dependence distance\n");
742438fd1498Szrj 	  return false;
742538fd1498Szrj 	}
742638fd1498Szrj 
742738fd1498Szrj       /* Similarly when the stmt is a load that is both part of a SLP
742838fd1498Szrj          instance and a loop vectorized stmt via the same-dr mechanism
742938fd1498Szrj 	 we have to give up.  */
743038fd1498Szrj       if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
743138fd1498Szrj 	  && (STMT_SLP_TYPE (stmt_info)
743238fd1498Szrj 	      != STMT_SLP_TYPE (vinfo_for_stmt
743338fd1498Szrj 				 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
743438fd1498Szrj 	{
743538fd1498Szrj 	  if (dump_enabled_p ())
743638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743738fd1498Szrj 			     "conflicting SLP types for CSEd load\n");
743838fd1498Szrj 	  return false;
743938fd1498Szrj 	}
744038fd1498Szrj     }
744138fd1498Szrj   else
744238fd1498Szrj     group_size = 1;
744338fd1498Szrj 
744438fd1498Szrj   vect_memory_access_type memory_access_type;
744538fd1498Szrj   if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
744638fd1498Szrj 			    &memory_access_type, &gs_info))
744738fd1498Szrj     return false;
744838fd1498Szrj 
744938fd1498Szrj   if (mask)
745038fd1498Szrj     {
745138fd1498Szrj       if (memory_access_type == VMAT_CONTIGUOUS)
745238fd1498Szrj 	{
745338fd1498Szrj 	  machine_mode vec_mode = TYPE_MODE (vectype);
745438fd1498Szrj 	  if (!VECTOR_MODE_P (vec_mode)
745538fd1498Szrj 	      || !can_vec_mask_load_store_p (vec_mode,
745638fd1498Szrj 					     TYPE_MODE (mask_vectype), true))
745738fd1498Szrj 	    return false;
745838fd1498Szrj 	}
745938fd1498Szrj       else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
746038fd1498Szrj 	{
746138fd1498Szrj 	  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
746238fd1498Szrj 	  tree masktype
746338fd1498Szrj 	    = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
746438fd1498Szrj 	  if (TREE_CODE (masktype) == INTEGER_TYPE)
746538fd1498Szrj 	    {
746638fd1498Szrj 	      if (dump_enabled_p ())
746738fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
746838fd1498Szrj 				 "masked gather with integer mask not"
746938fd1498Szrj 				 " supported.");
747038fd1498Szrj 	      return false;
747138fd1498Szrj 	    }
747238fd1498Szrj 	}
747338fd1498Szrj       else if (memory_access_type != VMAT_LOAD_STORE_LANES
747438fd1498Szrj 	       && memory_access_type != VMAT_GATHER_SCATTER)
747538fd1498Szrj 	{
747638fd1498Szrj 	  if (dump_enabled_p ())
747738fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
747838fd1498Szrj 			     "unsupported access type for masked load.\n");
747938fd1498Szrj 	  return false;
748038fd1498Szrj 	}
748138fd1498Szrj     }
748238fd1498Szrj 
748338fd1498Szrj   if (!vec_stmt) /* transformation not required.  */
748438fd1498Szrj     {
748538fd1498Szrj       if (!slp)
748638fd1498Szrj 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
748738fd1498Szrj 
748838fd1498Szrj       if (loop_vinfo
748938fd1498Szrj 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
749038fd1498Szrj 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
749138fd1498Szrj 				  memory_access_type, &gs_info);
749238fd1498Szrj 
749338fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
749438fd1498Szrj       /* The SLP costs are calculated during SLP analysis.  */
749538fd1498Szrj       if (! slp_node)
749638fd1498Szrj 	vect_model_load_cost (stmt_info, ncopies, memory_access_type,
749738fd1498Szrj 			      NULL, NULL, NULL);
749838fd1498Szrj       return true;
749938fd1498Szrj     }
750038fd1498Szrj 
750138fd1498Szrj   if (!slp)
750238fd1498Szrj     gcc_assert (memory_access_type
750338fd1498Szrj 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
750438fd1498Szrj 
750538fd1498Szrj   if (dump_enabled_p ())
750638fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
750738fd1498Szrj                      "transform load. ncopies = %d\n", ncopies);
750838fd1498Szrj 
750938fd1498Szrj   /* Transform.  */
751038fd1498Szrj 
751138fd1498Szrj   ensure_base_align (dr);
751238fd1498Szrj 
751338fd1498Szrj   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
751438fd1498Szrj     {
751538fd1498Szrj       vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
751638fd1498Szrj 				    mask_dt);
751738fd1498Szrj       return true;
751838fd1498Szrj     }
751938fd1498Szrj 
752038fd1498Szrj   if (memory_access_type == VMAT_ELEMENTWISE
752138fd1498Szrj       || memory_access_type == VMAT_STRIDED_SLP)
752238fd1498Szrj     {
752338fd1498Szrj       gimple_stmt_iterator incr_gsi;
752438fd1498Szrj       bool insert_after;
752538fd1498Szrj       gimple *incr;
752638fd1498Szrj       tree offvar;
752738fd1498Szrj       tree ivstep;
752838fd1498Szrj       tree running_off;
752938fd1498Szrj       vec<constructor_elt, va_gc> *v = NULL;
753038fd1498Szrj       tree stride_base, stride_step, alias_off;
753138fd1498Szrj       /* Checked by get_load_store_type.  */
753238fd1498Szrj       unsigned int const_nunits = nunits.to_constant ();
753338fd1498Szrj       unsigned HOST_WIDE_INT cst_offset = 0;
753438fd1498Szrj 
753538fd1498Szrj       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
753638fd1498Szrj       gcc_assert (!nested_in_vect_loop);
753738fd1498Szrj 
753838fd1498Szrj       if (grouped_load)
753938fd1498Szrj 	{
754038fd1498Szrj 	  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
754138fd1498Szrj 	  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
754238fd1498Szrj 	}
754338fd1498Szrj       else
754438fd1498Szrj 	{
754538fd1498Szrj 	  first_stmt = stmt;
754638fd1498Szrj 	  first_dr = dr;
754738fd1498Szrj 	}
754838fd1498Szrj       if (slp && grouped_load)
754938fd1498Szrj 	{
755038fd1498Szrj 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
755138fd1498Szrj 	  ref_type = get_group_alias_ptr_type (first_stmt);
755238fd1498Szrj 	}
755338fd1498Szrj       else
755438fd1498Szrj 	{
755538fd1498Szrj 	  if (grouped_load)
755638fd1498Szrj 	    cst_offset
755738fd1498Szrj 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
755838fd1498Szrj 		 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
755938fd1498Szrj 	  group_size = 1;
756038fd1498Szrj 	  ref_type = reference_alias_ptr_type (DR_REF (dr));
756138fd1498Szrj 	}
756238fd1498Szrj 
756338fd1498Szrj       stride_base
756438fd1498Szrj 	= fold_build_pointer_plus
756538fd1498Szrj 	    (DR_BASE_ADDRESS (first_dr),
756638fd1498Szrj 	     size_binop (PLUS_EXPR,
756738fd1498Szrj 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
756838fd1498Szrj 			 convert_to_ptrofftype (DR_INIT (first_dr))));
756938fd1498Szrj       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
757038fd1498Szrj 
757138fd1498Szrj       /* For a load with loop-invariant (but other than power-of-2)
757238fd1498Szrj          stride (i.e. not a grouped access) like so:
757338fd1498Szrj 
757438fd1498Szrj 	   for (i = 0; i < n; i += stride)
757538fd1498Szrj 	     ... = array[i];
757638fd1498Szrj 
757738fd1498Szrj 	 we generate a new induction variable and new accesses to
757838fd1498Szrj 	 form a new vector (or vectors, depending on ncopies):
757938fd1498Szrj 
758038fd1498Szrj 	   for (j = 0; ; j += VF*stride)
758138fd1498Szrj 	     tmp1 = array[j];
758238fd1498Szrj 	     tmp2 = array[j + stride];
758338fd1498Szrj 	     ...
758438fd1498Szrj 	     vectemp = {tmp1, tmp2, ...}
758538fd1498Szrj          */
758638fd1498Szrj 
758738fd1498Szrj       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
758838fd1498Szrj 			    build_int_cst (TREE_TYPE (stride_step), vf));
758938fd1498Szrj 
759038fd1498Szrj       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
759138fd1498Szrj 
759238fd1498Szrj       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
759338fd1498Szrj       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
759438fd1498Szrj       create_iv (stride_base, ivstep, NULL,
759538fd1498Szrj 		 loop, &incr_gsi, insert_after,
759638fd1498Szrj 		 &offvar, NULL);
759738fd1498Szrj       incr = gsi_stmt (incr_gsi);
759838fd1498Szrj       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
759938fd1498Szrj 
760038fd1498Szrj       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
760138fd1498Szrj 
760238fd1498Szrj       prev_stmt_info = NULL;
760338fd1498Szrj       running_off = offvar;
760438fd1498Szrj       alias_off = build_int_cst (ref_type, 0);
760538fd1498Szrj       int nloads = const_nunits;
760638fd1498Szrj       int lnel = 1;
760738fd1498Szrj       tree ltype = TREE_TYPE (vectype);
760838fd1498Szrj       tree lvectype = vectype;
760938fd1498Szrj       auto_vec<tree> dr_chain;
761038fd1498Szrj       if (memory_access_type == VMAT_STRIDED_SLP)
761138fd1498Szrj 	{
761238fd1498Szrj 	  if (group_size < const_nunits)
761338fd1498Szrj 	    {
761438fd1498Szrj 	      /* First check if vec_init optab supports construction from
761538fd1498Szrj 		 vector elts directly.  */
761638fd1498Szrj 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
761738fd1498Szrj 	      machine_mode vmode;
761838fd1498Szrj 	      if (mode_for_vector (elmode, group_size).exists (&vmode)
761938fd1498Szrj 		  && VECTOR_MODE_P (vmode)
762038fd1498Szrj 		  && targetm.vector_mode_supported_p (vmode)
762138fd1498Szrj 		  && (convert_optab_handler (vec_init_optab,
762238fd1498Szrj 					     TYPE_MODE (vectype), vmode)
762338fd1498Szrj 		      != CODE_FOR_nothing))
762438fd1498Szrj 		{
762538fd1498Szrj 		  nloads = const_nunits / group_size;
762638fd1498Szrj 		  lnel = group_size;
762738fd1498Szrj 		  ltype = build_vector_type (TREE_TYPE (vectype), group_size);
762838fd1498Szrj 		}
762938fd1498Szrj 	      else
763038fd1498Szrj 		{
763138fd1498Szrj 		  /* Otherwise avoid emitting a constructor of vector elements
763238fd1498Szrj 		     by performing the loads using an integer type of the same
763338fd1498Szrj 		     size, constructing a vector of those and then
763438fd1498Szrj 		     re-interpreting it as the original vector type.
763538fd1498Szrj 		     This avoids a huge runtime penalty due to the general
763638fd1498Szrj 		     inability to perform store forwarding from smaller stores
763738fd1498Szrj 		     to a larger load.  */
763838fd1498Szrj 		  unsigned lsize
763938fd1498Szrj 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
764038fd1498Szrj 		  elmode = int_mode_for_size (lsize, 0).require ();
764138fd1498Szrj 		  unsigned int lnunits = const_nunits / group_size;
764238fd1498Szrj 		  /* If we can't construct such a vector fall back to
764338fd1498Szrj 		     element loads of the original vector type.  */
764438fd1498Szrj 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
764538fd1498Szrj 		      && VECTOR_MODE_P (vmode)
764638fd1498Szrj 		      && targetm.vector_mode_supported_p (vmode)
764738fd1498Szrj 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
764838fd1498Szrj 			  != CODE_FOR_nothing))
764938fd1498Szrj 		    {
765038fd1498Szrj 		      nloads = lnunits;
765138fd1498Szrj 		      lnel = group_size;
765238fd1498Szrj 		      ltype = build_nonstandard_integer_type (lsize, 1);
765338fd1498Szrj 		      lvectype = build_vector_type (ltype, nloads);
765438fd1498Szrj 		    }
765538fd1498Szrj 		}
765638fd1498Szrj 	    }
765738fd1498Szrj 	  else
765838fd1498Szrj 	    {
765938fd1498Szrj 	      nloads = 1;
766038fd1498Szrj 	      lnel = const_nunits;
766138fd1498Szrj 	      ltype = vectype;
766238fd1498Szrj 	    }
766338fd1498Szrj 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
766438fd1498Szrj 	}
7665*58e805e6Szrj       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
7666*58e805e6Szrj       else if (nloads == 1)
7667*58e805e6Szrj 	ltype = vectype;
7668*58e805e6Szrj 
766938fd1498Szrj       if (slp)
767038fd1498Szrj 	{
767138fd1498Szrj 	  /* For SLP permutation support we need to load the whole group,
767238fd1498Szrj 	     not only the number of vector stmts the permutation result
767338fd1498Szrj 	     fits in.  */
767438fd1498Szrj 	  if (slp_perm)
767538fd1498Szrj 	    {
767638fd1498Szrj 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
767738fd1498Szrj 		 variable VF.  */
767838fd1498Szrj 	      unsigned int const_vf = vf.to_constant ();
767938fd1498Szrj 	      ncopies = CEIL (group_size * const_vf, const_nunits);
768038fd1498Szrj 	      dr_chain.create (ncopies);
768138fd1498Szrj 	    }
768238fd1498Szrj 	  else
768338fd1498Szrj 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
768438fd1498Szrj 	}
768538fd1498Szrj       unsigned int group_el = 0;
768638fd1498Szrj       unsigned HOST_WIDE_INT
768738fd1498Szrj 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
768838fd1498Szrj       for (j = 0; j < ncopies; j++)
768938fd1498Szrj 	{
769038fd1498Szrj 	  if (nloads > 1)
769138fd1498Szrj 	    vec_alloc (v, nloads);
769238fd1498Szrj 	  for (i = 0; i < nloads; i++)
769338fd1498Szrj 	    {
769438fd1498Szrj 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
769538fd1498Szrj 					     group_el * elsz + cst_offset);
769638fd1498Szrj 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
769738fd1498Szrj 	      vect_copy_ref_info (data_ref, DR_REF (first_dr));
769838fd1498Szrj 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
769938fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
770038fd1498Szrj 	      if (nloads > 1)
770138fd1498Szrj 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
770238fd1498Szrj 					gimple_assign_lhs (new_stmt));
770338fd1498Szrj 
770438fd1498Szrj 	      group_el += lnel;
770538fd1498Szrj 	      if (! slp
770638fd1498Szrj 		  || group_el == group_size)
770738fd1498Szrj 		{
770838fd1498Szrj 		  tree newoff = copy_ssa_name (running_off);
770938fd1498Szrj 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
771038fd1498Szrj 						      running_off, stride_step);
771138fd1498Szrj 		  vect_finish_stmt_generation (stmt, incr, gsi);
771238fd1498Szrj 
771338fd1498Szrj 		  running_off = newoff;
771438fd1498Szrj 		  group_el = 0;
771538fd1498Szrj 		}
771638fd1498Szrj 	    }
771738fd1498Szrj 	  if (nloads > 1)
771838fd1498Szrj 	    {
771938fd1498Szrj 	      tree vec_inv = build_constructor (lvectype, v);
772038fd1498Szrj 	      new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
772138fd1498Szrj 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
772238fd1498Szrj 	      if (lvectype != vectype)
772338fd1498Szrj 		{
772438fd1498Szrj 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
772538fd1498Szrj 						  VIEW_CONVERT_EXPR,
772638fd1498Szrj 						  build1 (VIEW_CONVERT_EXPR,
772738fd1498Szrj 							  vectype, new_temp));
772838fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
772938fd1498Szrj 		}
773038fd1498Szrj 	    }
773138fd1498Szrj 
773238fd1498Szrj 	  if (slp)
773338fd1498Szrj 	    {
773438fd1498Szrj 	      if (slp_perm)
773538fd1498Szrj 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
773638fd1498Szrj 	      else
773738fd1498Szrj 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
773838fd1498Szrj 	    }
773938fd1498Szrj 	  else
774038fd1498Szrj 	    {
774138fd1498Szrj 	      if (j == 0)
774238fd1498Szrj 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
774338fd1498Szrj 	      else
774438fd1498Szrj 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
774538fd1498Szrj 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
774638fd1498Szrj 	    }
774738fd1498Szrj 	}
774838fd1498Szrj       if (slp_perm)
774938fd1498Szrj 	{
775038fd1498Szrj 	  unsigned n_perms;
775138fd1498Szrj 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
775238fd1498Szrj 					slp_node_instance, false, &n_perms);
775338fd1498Szrj 	}
775438fd1498Szrj       return true;
775538fd1498Szrj     }
775638fd1498Szrj 
775738fd1498Szrj   if (memory_access_type == VMAT_GATHER_SCATTER
775838fd1498Szrj       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
775938fd1498Szrj     grouped_load = false;
776038fd1498Szrj 
776138fd1498Szrj   if (grouped_load)
776238fd1498Szrj     {
776338fd1498Szrj       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
776438fd1498Szrj       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
776538fd1498Szrj       /* For SLP vectorization we directly vectorize a subchain
776638fd1498Szrj          without permutation.  */
776738fd1498Szrj       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
776838fd1498Szrj 	first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
776938fd1498Szrj       /* For BB vectorization always use the first stmt to base
777038fd1498Szrj 	 the data ref pointer on.  */
777138fd1498Szrj       if (bb_vinfo)
777238fd1498Szrj 	first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
777338fd1498Szrj 
777438fd1498Szrj       /* Check if the chain of loads is already vectorized.  */
777538fd1498Szrj       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
777638fd1498Szrj 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
777738fd1498Szrj 	     ???  But we can only do so if there is exactly one
777838fd1498Szrj 	     as we have no way to get at the rest.  Leave the CSE
777938fd1498Szrj 	     opportunity alone.
778038fd1498Szrj 	     ???  With the group load eventually participating
778138fd1498Szrj 	     in multiple different permutations (having multiple
778238fd1498Szrj 	     slp nodes which refer to the same group) the CSE
778338fd1498Szrj 	     is even wrong code.  See PR56270.  */
778438fd1498Szrj 	  && !slp)
778538fd1498Szrj 	{
778638fd1498Szrj 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
778738fd1498Szrj 	  return true;
778838fd1498Szrj 	}
778938fd1498Szrj       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
779038fd1498Szrj       group_gap_adj = 0;
779138fd1498Szrj 
779238fd1498Szrj       /* VEC_NUM is the number of vect stmts to be created for this group.  */
779338fd1498Szrj       if (slp)
779438fd1498Szrj 	{
779538fd1498Szrj 	  grouped_load = false;
779638fd1498Szrj 	  /* For SLP permutation support we need to load the whole group,
779738fd1498Szrj 	     not only the number of vector stmts the permutation result
779838fd1498Szrj 	     fits in.  */
779938fd1498Szrj 	  if (slp_perm)
780038fd1498Szrj 	    {
780138fd1498Szrj 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
780238fd1498Szrj 		 variable VF.  */
780338fd1498Szrj 	      unsigned int const_vf = vf.to_constant ();
780438fd1498Szrj 	      unsigned int const_nunits = nunits.to_constant ();
780538fd1498Szrj 	      vec_num = CEIL (group_size * const_vf, const_nunits);
780638fd1498Szrj 	      group_gap_adj = vf * group_size - nunits * vec_num;
780738fd1498Szrj 	    }
780838fd1498Szrj 	  else
780938fd1498Szrj 	    {
781038fd1498Szrj 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
781138fd1498Szrj 	      group_gap_adj
781238fd1498Szrj 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
781338fd1498Szrj 	    }
781438fd1498Szrj     	}
781538fd1498Szrj       else
781638fd1498Szrj 	vec_num = group_size;
781738fd1498Szrj 
781838fd1498Szrj       ref_type = get_group_alias_ptr_type (first_stmt);
781938fd1498Szrj     }
782038fd1498Szrj   else
782138fd1498Szrj     {
782238fd1498Szrj       first_stmt = stmt;
782338fd1498Szrj       first_dr = dr;
782438fd1498Szrj       group_size = vec_num = 1;
782538fd1498Szrj       group_gap_adj = 0;
782638fd1498Szrj       ref_type = reference_alias_ptr_type (DR_REF (first_dr));
782738fd1498Szrj     }
782838fd1498Szrj 
782938fd1498Szrj   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
783038fd1498Szrj   gcc_assert (alignment_support_scheme);
783138fd1498Szrj   vec_loop_masks *loop_masks
783238fd1498Szrj     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
783338fd1498Szrj        ? &LOOP_VINFO_MASKS (loop_vinfo)
783438fd1498Szrj        : NULL);
783538fd1498Szrj   /* Targets with store-lane instructions must not require explicit
783638fd1498Szrj      realignment.  vect_supportable_dr_alignment always returns either
783738fd1498Szrj      dr_aligned or dr_unaligned_supported for masked operations.  */
783838fd1498Szrj   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
783938fd1498Szrj 	       && !mask
784038fd1498Szrj 	       && !loop_masks)
784138fd1498Szrj 	      || alignment_support_scheme == dr_aligned
784238fd1498Szrj 	      || alignment_support_scheme == dr_unaligned_supported);
784338fd1498Szrj 
784438fd1498Szrj   /* In case the vectorization factor (VF) is bigger than the number
784538fd1498Szrj      of elements that we can fit in a vectype (nunits), we have to generate
784638fd1498Szrj      more than one vector stmt - i.e - we need to "unroll" the
784738fd1498Szrj      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
784838fd1498Szrj      from one copy of the vector stmt to the next, in the field
784938fd1498Szrj      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
785038fd1498Szrj      stages to find the correct vector defs to be used when vectorizing
785138fd1498Szrj      stmts that use the defs of the current stmt.  The example below
785238fd1498Szrj      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
785338fd1498Szrj      need to create 4 vectorized stmts):
785438fd1498Szrj 
785538fd1498Szrj      before vectorization:
785638fd1498Szrj                                 RELATED_STMT    VEC_STMT
785738fd1498Szrj         S1:     x = memref      -               -
785838fd1498Szrj         S2:     z = x + 1       -               -
785938fd1498Szrj 
786038fd1498Szrj      step 1: vectorize stmt S1:
786138fd1498Szrj         We first create the vector stmt VS1_0, and, as usual, record a
786238fd1498Szrj         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
786338fd1498Szrj         Next, we create the vector stmt VS1_1, and record a pointer to
786438fd1498Szrj         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
786538fd1498Szrj         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
786638fd1498Szrj         stmts and pointers:
786738fd1498Szrj                                 RELATED_STMT    VEC_STMT
786838fd1498Szrj         VS1_0:  vx0 = memref0   VS1_1           -
786938fd1498Szrj         VS1_1:  vx1 = memref1   VS1_2           -
787038fd1498Szrj         VS1_2:  vx2 = memref2   VS1_3           -
787138fd1498Szrj         VS1_3:  vx3 = memref3   -               -
787238fd1498Szrj         S1:     x = load        -               VS1_0
787338fd1498Szrj         S2:     z = x + 1       -               -
787438fd1498Szrj 
787538fd1498Szrj      See in documentation in vect_get_vec_def_for_stmt_copy for how the
787638fd1498Szrj      information we recorded in RELATED_STMT field is used to vectorize
787738fd1498Szrj      stmt S2.  */
787838fd1498Szrj 
787938fd1498Szrj   /* In case of interleaving (non-unit grouped access):
788038fd1498Szrj 
788138fd1498Szrj      S1:  x2 = &base + 2
788238fd1498Szrj      S2:  x0 = &base
788338fd1498Szrj      S3:  x1 = &base + 1
788438fd1498Szrj      S4:  x3 = &base + 3
788538fd1498Szrj 
788638fd1498Szrj      Vectorized loads are created in the order of memory accesses
788738fd1498Szrj      starting from the access of the first stmt of the chain:
788838fd1498Szrj 
788938fd1498Szrj      VS1: vx0 = &base
789038fd1498Szrj      VS2: vx1 = &base + vec_size*1
789138fd1498Szrj      VS3: vx3 = &base + vec_size*2
789238fd1498Szrj      VS4: vx4 = &base + vec_size*3
789338fd1498Szrj 
789438fd1498Szrj      Then permutation statements are generated:
789538fd1498Szrj 
789638fd1498Szrj      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
789738fd1498Szrj      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
789838fd1498Szrj        ...
789938fd1498Szrj 
790038fd1498Szrj      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
790138fd1498Szrj      (the order of the data-refs in the output of vect_permute_load_chain
790238fd1498Szrj      corresponds to the order of scalar stmts in the interleaving chain - see
790338fd1498Szrj      the documentation of vect_permute_load_chain()).
790438fd1498Szrj      The generation of permutation stmts and recording them in
790538fd1498Szrj      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
790638fd1498Szrj 
790738fd1498Szrj      In case of both multiple types and interleaving, the vector loads and
790838fd1498Szrj      permutation stmts above are created for every copy.  The result vector
790938fd1498Szrj      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
791038fd1498Szrj      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
791138fd1498Szrj 
791238fd1498Szrj   /* If the data reference is aligned (dr_aligned) or potentially unaligned
791338fd1498Szrj      on a target that supports unaligned accesses (dr_unaligned_supported)
791438fd1498Szrj      we generate the following code:
791538fd1498Szrj          p = initial_addr;
791638fd1498Szrj          indx = 0;
791738fd1498Szrj          loop {
791838fd1498Szrj 	   p = p + indx * vectype_size;
791938fd1498Szrj            vec_dest = *(p);
792038fd1498Szrj            indx = indx + 1;
792138fd1498Szrj          }
792238fd1498Szrj 
792338fd1498Szrj      Otherwise, the data reference is potentially unaligned on a target that
792438fd1498Szrj      does not support unaligned accesses (dr_explicit_realign_optimized) -
792538fd1498Szrj      then generate the following code, in which the data in each iteration is
792638fd1498Szrj      obtained by two vector loads, one from the previous iteration, and one
792738fd1498Szrj      from the current iteration:
792838fd1498Szrj          p1 = initial_addr;
792938fd1498Szrj          msq_init = *(floor(p1))
793038fd1498Szrj          p2 = initial_addr + VS - 1;
793138fd1498Szrj          realignment_token = call target_builtin;
793238fd1498Szrj          indx = 0;
793338fd1498Szrj          loop {
793438fd1498Szrj            p2 = p2 + indx * vectype_size
793538fd1498Szrj            lsq = *(floor(p2))
793638fd1498Szrj            vec_dest = realign_load (msq, lsq, realignment_token)
793738fd1498Szrj            indx = indx + 1;
793838fd1498Szrj            msq = lsq;
793938fd1498Szrj          }   */
794038fd1498Szrj 
794138fd1498Szrj   /* If the misalignment remains the same throughout the execution of the
794238fd1498Szrj      loop, we can create the init_addr and permutation mask at the loop
794338fd1498Szrj      preheader.  Otherwise, it needs to be created inside the loop.
794438fd1498Szrj      This can only occur when vectorizing memory accesses in the inner-loop
794538fd1498Szrj      nested within an outer-loop that is being vectorized.  */
794638fd1498Szrj 
794738fd1498Szrj   if (nested_in_vect_loop
794838fd1498Szrj       && !multiple_p (DR_STEP_ALIGNMENT (dr),
794938fd1498Szrj 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
795038fd1498Szrj     {
795138fd1498Szrj       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
795238fd1498Szrj       compute_in_loop = true;
795338fd1498Szrj     }
795438fd1498Szrj 
795538fd1498Szrj   if ((alignment_support_scheme == dr_explicit_realign_optimized
795638fd1498Szrj        || alignment_support_scheme == dr_explicit_realign)
795738fd1498Szrj       && !compute_in_loop)
795838fd1498Szrj     {
795938fd1498Szrj       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
796038fd1498Szrj 				    alignment_support_scheme, NULL_TREE,
796138fd1498Szrj 				    &at_loop);
796238fd1498Szrj       if (alignment_support_scheme == dr_explicit_realign_optimized)
796338fd1498Szrj 	{
796438fd1498Szrj 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
796538fd1498Szrj 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
796638fd1498Szrj 				    size_one_node);
796738fd1498Szrj 	}
796838fd1498Szrj     }
796938fd1498Szrj   else
797038fd1498Szrj     at_loop = loop;
797138fd1498Szrj 
797238fd1498Szrj   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
797338fd1498Szrj     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
797438fd1498Szrj 
797538fd1498Szrj   tree bump;
797638fd1498Szrj   tree vec_offset = NULL_TREE;
797738fd1498Szrj   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
797838fd1498Szrj     {
797938fd1498Szrj       aggr_type = NULL_TREE;
798038fd1498Szrj       bump = NULL_TREE;
798138fd1498Szrj     }
798238fd1498Szrj   else if (memory_access_type == VMAT_GATHER_SCATTER)
798338fd1498Szrj     {
798438fd1498Szrj       aggr_type = elem_type;
798538fd1498Szrj       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
798638fd1498Szrj 				       &bump, &vec_offset);
798738fd1498Szrj     }
798838fd1498Szrj   else
798938fd1498Szrj     {
799038fd1498Szrj       if (memory_access_type == VMAT_LOAD_STORE_LANES)
799138fd1498Szrj 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
799238fd1498Szrj       else
799338fd1498Szrj 	aggr_type = vectype;
799438fd1498Szrj       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
799538fd1498Szrj     }
799638fd1498Szrj 
799738fd1498Szrj   tree vec_mask = NULL_TREE;
799838fd1498Szrj   prev_stmt_info = NULL;
799938fd1498Szrj   poly_uint64 group_elt = 0;
800038fd1498Szrj   for (j = 0; j < ncopies; j++)
800138fd1498Szrj     {
800238fd1498Szrj       /* 1. Create the vector or array pointer update chain.  */
800338fd1498Szrj       if (j == 0)
800438fd1498Szrj 	{
800538fd1498Szrj 	  bool simd_lane_access_p
800638fd1498Szrj 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
800738fd1498Szrj 	  if (simd_lane_access_p
800838fd1498Szrj 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
800938fd1498Szrj 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
801038fd1498Szrj 	      && integer_zerop (DR_OFFSET (first_dr))
801138fd1498Szrj 	      && integer_zerop (DR_INIT (first_dr))
801238fd1498Szrj 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
801338fd1498Szrj 					get_alias_set (TREE_TYPE (ref_type)))
801438fd1498Szrj 	      && (alignment_support_scheme == dr_aligned
801538fd1498Szrj 		  || alignment_support_scheme == dr_unaligned_supported))
801638fd1498Szrj 	    {
801738fd1498Szrj 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
801838fd1498Szrj 	      dataref_offset = build_int_cst (ref_type, 0);
801938fd1498Szrj 	      inv_p = false;
802038fd1498Szrj 	    }
802138fd1498Szrj 	  else if (first_stmt_for_drptr
802238fd1498Szrj 		   && first_stmt != first_stmt_for_drptr)
802338fd1498Szrj 	    {
802438fd1498Szrj 	      dataref_ptr
802538fd1498Szrj 		= vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
802638fd1498Szrj 					    at_loop, offset, &dummy, gsi,
802738fd1498Szrj 					    &ptr_incr, simd_lane_access_p,
802838fd1498Szrj 					    &inv_p, byte_offset, bump);
802938fd1498Szrj 	      /* Adjust the pointer by the difference to first_stmt.  */
803038fd1498Szrj 	      data_reference_p ptrdr
803138fd1498Szrj 		= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
803238fd1498Szrj 	      tree diff = fold_convert (sizetype,
803338fd1498Szrj 					size_binop (MINUS_EXPR,
803438fd1498Szrj 						    DR_INIT (first_dr),
803538fd1498Szrj 						    DR_INIT (ptrdr)));
803638fd1498Szrj 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
803738fd1498Szrj 					     stmt, diff);
803838fd1498Szrj 	    }
803938fd1498Szrj 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
804038fd1498Szrj 	    {
804138fd1498Szrj 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
804238fd1498Szrj 					   &dataref_ptr, &vec_offset);
804338fd1498Szrj 	      inv_p = false;
804438fd1498Szrj 	    }
804538fd1498Szrj 	  else
804638fd1498Szrj 	    dataref_ptr
804738fd1498Szrj 	      = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
804838fd1498Szrj 					  offset, &dummy, gsi, &ptr_incr,
804938fd1498Szrj 					  simd_lane_access_p, &inv_p,
805038fd1498Szrj 					  byte_offset, bump);
805138fd1498Szrj 	  if (mask)
805238fd1498Szrj 	    vec_mask = vect_get_vec_def_for_operand (mask, stmt,
805338fd1498Szrj 						     mask_vectype);
805438fd1498Szrj 	}
805538fd1498Szrj       else
805638fd1498Szrj 	{
805738fd1498Szrj 	  if (dataref_offset)
805838fd1498Szrj 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
805938fd1498Szrj 					      bump);
806038fd1498Szrj 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
806138fd1498Szrj 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
806238fd1498Szrj 							 vec_offset);
806338fd1498Szrj 	  else
806438fd1498Szrj 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
806538fd1498Szrj 					   stmt, bump);
806638fd1498Szrj 	  if (mask)
806738fd1498Szrj 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
806838fd1498Szrj 	}
806938fd1498Szrj 
807038fd1498Szrj       if (grouped_load || slp_perm)
807138fd1498Szrj 	dr_chain.create (vec_num);
807238fd1498Szrj 
807338fd1498Szrj       if (memory_access_type == VMAT_LOAD_STORE_LANES)
807438fd1498Szrj 	{
807538fd1498Szrj 	  tree vec_array;
807638fd1498Szrj 
807738fd1498Szrj 	  vec_array = create_vector_array (vectype, vec_num);
807838fd1498Szrj 
807938fd1498Szrj 	  tree final_mask = NULL_TREE;
808038fd1498Szrj 	  if (loop_masks)
808138fd1498Szrj 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
808238fd1498Szrj 					     vectype, j);
808338fd1498Szrj 	  if (vec_mask)
808438fd1498Szrj 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
808538fd1498Szrj 						  vec_mask, gsi);
808638fd1498Szrj 
808738fd1498Szrj 	  gcall *call;
808838fd1498Szrj 	  if (final_mask)
808938fd1498Szrj 	    {
809038fd1498Szrj 	      /* Emit:
809138fd1498Szrj 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
809238fd1498Szrj 		                                VEC_MASK).  */
809338fd1498Szrj 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
809438fd1498Szrj 	      tree alias_ptr = build_int_cst (ref_type, align);
809538fd1498Szrj 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
809638fd1498Szrj 						 dataref_ptr, alias_ptr,
809738fd1498Szrj 						 final_mask);
809838fd1498Szrj 	    }
809938fd1498Szrj 	  else
810038fd1498Szrj 	    {
810138fd1498Szrj 	      /* Emit:
810238fd1498Szrj 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
810338fd1498Szrj 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
810438fd1498Szrj 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
810538fd1498Szrj 	    }
810638fd1498Szrj 	  gimple_call_set_lhs (call, vec_array);
810738fd1498Szrj 	  gimple_call_set_nothrow (call, true);
810838fd1498Szrj 	  new_stmt = call;
810938fd1498Szrj 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
811038fd1498Szrj 
811138fd1498Szrj 	  /* Extract each vector into an SSA_NAME.  */
811238fd1498Szrj 	  for (i = 0; i < vec_num; i++)
811338fd1498Szrj 	    {
811438fd1498Szrj 	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
811538fd1498Szrj 					    vec_array, i);
811638fd1498Szrj 	      dr_chain.quick_push (new_temp);
811738fd1498Szrj 	    }
811838fd1498Szrj 
811938fd1498Szrj 	  /* Record the mapping between SSA_NAMEs and statements.  */
812038fd1498Szrj 	  vect_record_grouped_load_vectors (stmt, dr_chain);
812138fd1498Szrj 	}
812238fd1498Szrj       else
812338fd1498Szrj 	{
812438fd1498Szrj 	  for (i = 0; i < vec_num; i++)
812538fd1498Szrj 	    {
812638fd1498Szrj 	      tree final_mask = NULL_TREE;
812738fd1498Szrj 	      if (loop_masks
812838fd1498Szrj 		  && memory_access_type != VMAT_INVARIANT)
812938fd1498Szrj 		final_mask = vect_get_loop_mask (gsi, loop_masks,
813038fd1498Szrj 						 vec_num * ncopies,
813138fd1498Szrj 						 vectype, vec_num * j + i);
813238fd1498Szrj 	      if (vec_mask)
813338fd1498Szrj 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
813438fd1498Szrj 						      vec_mask, gsi);
813538fd1498Szrj 
813638fd1498Szrj 	      if (i > 0)
813738fd1498Szrj 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
813838fd1498Szrj 					       stmt, bump);
813938fd1498Szrj 
814038fd1498Szrj 	      /* 2. Create the vector-load in the loop.  */
814138fd1498Szrj 	      switch (alignment_support_scheme)
814238fd1498Szrj 		{
814338fd1498Szrj 		case dr_aligned:
814438fd1498Szrj 		case dr_unaligned_supported:
814538fd1498Szrj 		  {
814638fd1498Szrj 		    unsigned int align, misalign;
814738fd1498Szrj 
814838fd1498Szrj 		    if (memory_access_type == VMAT_GATHER_SCATTER)
814938fd1498Szrj 		      {
815038fd1498Szrj 			tree scale = size_int (gs_info.scale);
815138fd1498Szrj 			gcall *call;
815238fd1498Szrj 			if (loop_masks)
815338fd1498Szrj 			  call = gimple_build_call_internal
815438fd1498Szrj 			    (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
815538fd1498Szrj 			     vec_offset, scale, final_mask);
815638fd1498Szrj 			else
815738fd1498Szrj 			  call = gimple_build_call_internal
815838fd1498Szrj 			    (IFN_GATHER_LOAD, 3, dataref_ptr,
815938fd1498Szrj 			     vec_offset, scale);
816038fd1498Szrj 			gimple_call_set_nothrow (call, true);
816138fd1498Szrj 			new_stmt = call;
816238fd1498Szrj 			data_ref = NULL_TREE;
816338fd1498Szrj 			break;
816438fd1498Szrj 		      }
816538fd1498Szrj 
816638fd1498Szrj 		    align = DR_TARGET_ALIGNMENT (dr);
816738fd1498Szrj 		    if (alignment_support_scheme == dr_aligned)
816838fd1498Szrj 		      {
816938fd1498Szrj 			gcc_assert (aligned_access_p (first_dr));
817038fd1498Szrj 			misalign = 0;
817138fd1498Szrj 		      }
817238fd1498Szrj 		    else if (DR_MISALIGNMENT (first_dr) == -1)
817338fd1498Szrj 		      {
817438fd1498Szrj 			align = dr_alignment (vect_dr_behavior (first_dr));
817538fd1498Szrj 			misalign = 0;
817638fd1498Szrj 		      }
817738fd1498Szrj 		    else
817838fd1498Szrj 		      misalign = DR_MISALIGNMENT (first_dr);
817938fd1498Szrj 		    if (dataref_offset == NULL_TREE
818038fd1498Szrj 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
818138fd1498Szrj 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
818238fd1498Szrj 					      align, misalign);
818338fd1498Szrj 
818438fd1498Szrj 		    if (final_mask)
818538fd1498Szrj 		      {
818638fd1498Szrj 			align = least_bit_hwi (misalign | align);
818738fd1498Szrj 			tree ptr = build_int_cst (ref_type, align);
818838fd1498Szrj 			gcall *call
818938fd1498Szrj 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
819038fd1498Szrj 							dataref_ptr, ptr,
819138fd1498Szrj 							final_mask);
819238fd1498Szrj 			gimple_call_set_nothrow (call, true);
819338fd1498Szrj 			new_stmt = call;
819438fd1498Szrj 			data_ref = NULL_TREE;
819538fd1498Szrj 		      }
819638fd1498Szrj 		    else
819738fd1498Szrj 		      {
819838fd1498Szrj 			data_ref
819938fd1498Szrj 			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
820038fd1498Szrj 					 dataref_offset
820138fd1498Szrj 					 ? dataref_offset
820238fd1498Szrj 					 : build_int_cst (ref_type, 0));
820338fd1498Szrj 			if (alignment_support_scheme == dr_aligned)
820438fd1498Szrj 			  ;
820538fd1498Szrj 			else if (DR_MISALIGNMENT (first_dr) == -1)
820638fd1498Szrj 			  TREE_TYPE (data_ref)
820738fd1498Szrj 			    = build_aligned_type (TREE_TYPE (data_ref),
820838fd1498Szrj 						  align * BITS_PER_UNIT);
820938fd1498Szrj 			else
821038fd1498Szrj 			  TREE_TYPE (data_ref)
821138fd1498Szrj 			    = build_aligned_type (TREE_TYPE (data_ref),
821238fd1498Szrj 						  TYPE_ALIGN (elem_type));
821338fd1498Szrj 		      }
821438fd1498Szrj 		    break;
821538fd1498Szrj 		  }
821638fd1498Szrj 		case dr_explicit_realign:
821738fd1498Szrj 		  {
821838fd1498Szrj 		    tree ptr, bump;
821938fd1498Szrj 
822038fd1498Szrj 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
822138fd1498Szrj 
822238fd1498Szrj 		    if (compute_in_loop)
822338fd1498Szrj 		      msq = vect_setup_realignment (first_stmt, gsi,
822438fd1498Szrj 						    &realignment_token,
822538fd1498Szrj 						    dr_explicit_realign,
822638fd1498Szrj 						    dataref_ptr, NULL);
822738fd1498Szrj 
822838fd1498Szrj 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
822938fd1498Szrj 		      ptr = copy_ssa_name (dataref_ptr);
823038fd1498Szrj 		    else
823138fd1498Szrj 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
823238fd1498Szrj 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
823338fd1498Szrj 		    new_stmt = gimple_build_assign
823438fd1498Szrj 				 (ptr, BIT_AND_EXPR, dataref_ptr,
823538fd1498Szrj 				  build_int_cst
823638fd1498Szrj 				  (TREE_TYPE (dataref_ptr),
823738fd1498Szrj 				   -(HOST_WIDE_INT) align));
823838fd1498Szrj 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
823938fd1498Szrj 		    data_ref
824038fd1498Szrj 		      = build2 (MEM_REF, vectype, ptr,
824138fd1498Szrj 				build_int_cst (ref_type, 0));
824238fd1498Szrj 		    vect_copy_ref_info (data_ref, DR_REF (first_dr));
824338fd1498Szrj 		    vec_dest = vect_create_destination_var (scalar_dest,
824438fd1498Szrj 							    vectype);
824538fd1498Szrj 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
824638fd1498Szrj 		    new_temp = make_ssa_name (vec_dest, new_stmt);
824738fd1498Szrj 		    gimple_assign_set_lhs (new_stmt, new_temp);
824838fd1498Szrj 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
824938fd1498Szrj 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
825038fd1498Szrj 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
825138fd1498Szrj 		    msq = new_temp;
825238fd1498Szrj 
825338fd1498Szrj 		    bump = size_binop (MULT_EXPR, vs,
825438fd1498Szrj 				       TYPE_SIZE_UNIT (elem_type));
825538fd1498Szrj 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
825638fd1498Szrj 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
825738fd1498Szrj 		    new_stmt = gimple_build_assign
825838fd1498Szrj 				 (NULL_TREE, BIT_AND_EXPR, ptr,
825938fd1498Szrj 				  build_int_cst
826038fd1498Szrj 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
826138fd1498Szrj 		    ptr = copy_ssa_name (ptr, new_stmt);
826238fd1498Szrj 		    gimple_assign_set_lhs (new_stmt, ptr);
826338fd1498Szrj 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
826438fd1498Szrj 		    data_ref
826538fd1498Szrj 		      = build2 (MEM_REF, vectype, ptr,
826638fd1498Szrj 				build_int_cst (ref_type, 0));
826738fd1498Szrj 		    break;
826838fd1498Szrj 		  }
826938fd1498Szrj 		case dr_explicit_realign_optimized:
827038fd1498Szrj 		  {
827138fd1498Szrj 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
827238fd1498Szrj 		      new_temp = copy_ssa_name (dataref_ptr);
827338fd1498Szrj 		    else
827438fd1498Szrj 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
827538fd1498Szrj 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
827638fd1498Szrj 		    new_stmt = gimple_build_assign
827738fd1498Szrj 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
827838fd1498Szrj 		       build_int_cst (TREE_TYPE (dataref_ptr),
827938fd1498Szrj 				     -(HOST_WIDE_INT) align));
828038fd1498Szrj 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
828138fd1498Szrj 		    data_ref
828238fd1498Szrj 		      = build2 (MEM_REF, vectype, new_temp,
828338fd1498Szrj 				build_int_cst (ref_type, 0));
828438fd1498Szrj 		    break;
828538fd1498Szrj 		  }
828638fd1498Szrj 		default:
828738fd1498Szrj 		  gcc_unreachable ();
828838fd1498Szrj 		}
828938fd1498Szrj 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
829038fd1498Szrj 	      /* DATA_REF is null if we've already built the statement.  */
829138fd1498Szrj 	      if (data_ref)
829238fd1498Szrj 		{
829338fd1498Szrj 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
829438fd1498Szrj 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
829538fd1498Szrj 		}
829638fd1498Szrj 	      new_temp = make_ssa_name (vec_dest, new_stmt);
829738fd1498Szrj 	      gimple_set_lhs (new_stmt, new_temp);
829838fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
829938fd1498Szrj 
830038fd1498Szrj 	      /* 3. Handle explicit realignment if necessary/supported.
830138fd1498Szrj 		 Create in loop:
830238fd1498Szrj 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
830338fd1498Szrj 	      if (alignment_support_scheme == dr_explicit_realign_optimized
830438fd1498Szrj 		  || alignment_support_scheme == dr_explicit_realign)
830538fd1498Szrj 		{
830638fd1498Szrj 		  lsq = gimple_assign_lhs (new_stmt);
830738fd1498Szrj 		  if (!realignment_token)
830838fd1498Szrj 		    realignment_token = dataref_ptr;
830938fd1498Szrj 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
831038fd1498Szrj 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
831138fd1498Szrj 						  msq, lsq, realignment_token);
831238fd1498Szrj 		  new_temp = make_ssa_name (vec_dest, new_stmt);
831338fd1498Szrj 		  gimple_assign_set_lhs (new_stmt, new_temp);
831438fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
831538fd1498Szrj 
831638fd1498Szrj 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
831738fd1498Szrj 		    {
831838fd1498Szrj 		      gcc_assert (phi);
831938fd1498Szrj 		      if (i == vec_num - 1 && j == ncopies - 1)
832038fd1498Szrj 			add_phi_arg (phi, lsq,
832138fd1498Szrj 				     loop_latch_edge (containing_loop),
832238fd1498Szrj 				     UNKNOWN_LOCATION);
832338fd1498Szrj 		      msq = lsq;
832438fd1498Szrj 		    }
832538fd1498Szrj 		}
832638fd1498Szrj 
832738fd1498Szrj 	      /* 4. Handle invariant-load.  */
832838fd1498Szrj 	      if (inv_p && !bb_vinfo)
832938fd1498Szrj 		{
833038fd1498Szrj 		  gcc_assert (!grouped_load);
833138fd1498Szrj 		  /* If we have versioned for aliasing or the loop doesn't
833238fd1498Szrj 		     have any data dependencies that would preclude this,
833338fd1498Szrj 		     then we are sure this is a loop invariant load and
833438fd1498Szrj 		     thus we can insert it on the preheader edge.  */
833538fd1498Szrj 		  if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
833638fd1498Szrj 		      && !nested_in_vect_loop
833738fd1498Szrj 		      && hoist_defs_of_uses (stmt, loop))
833838fd1498Szrj 		    {
833938fd1498Szrj 		      if (dump_enabled_p ())
834038fd1498Szrj 			{
834138fd1498Szrj 			  dump_printf_loc (MSG_NOTE, vect_location,
834238fd1498Szrj 					   "hoisting out of the vectorized "
834338fd1498Szrj 					   "loop: ");
834438fd1498Szrj 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
834538fd1498Szrj 			}
834638fd1498Szrj 		      tree tem = copy_ssa_name (scalar_dest);
834738fd1498Szrj 		      gsi_insert_on_edge_immediate
834838fd1498Szrj 			(loop_preheader_edge (loop),
834938fd1498Szrj 			 gimple_build_assign (tem,
835038fd1498Szrj 					      unshare_expr
835138fd1498Szrj 					        (gimple_assign_rhs1 (stmt))));
835238fd1498Szrj 		      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
835338fd1498Szrj 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
835438fd1498Szrj 		      set_vinfo_for_stmt (new_stmt,
835538fd1498Szrj 					  new_stmt_vec_info (new_stmt, vinfo));
835638fd1498Szrj 		    }
835738fd1498Szrj 		  else
835838fd1498Szrj 		    {
835938fd1498Szrj 		      gimple_stmt_iterator gsi2 = *gsi;
836038fd1498Szrj 		      gsi_next (&gsi2);
836138fd1498Szrj 		      new_temp = vect_init_vector (stmt, scalar_dest,
836238fd1498Szrj 						   vectype, &gsi2);
836338fd1498Szrj 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
836438fd1498Szrj 		    }
836538fd1498Szrj 		}
836638fd1498Szrj 
836738fd1498Szrj 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
836838fd1498Szrj 		{
836938fd1498Szrj 		  tree perm_mask = perm_mask_for_reverse (vectype);
837038fd1498Szrj 		  new_temp = permute_vec_elements (new_temp, new_temp,
837138fd1498Szrj 						   perm_mask, stmt, gsi);
837238fd1498Szrj 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
837338fd1498Szrj 		}
837438fd1498Szrj 
837538fd1498Szrj 	      /* Collect vector loads and later create their permutation in
837638fd1498Szrj 		 vect_transform_grouped_load ().  */
837738fd1498Szrj 	      if (grouped_load || slp_perm)
837838fd1498Szrj 		dr_chain.quick_push (new_temp);
837938fd1498Szrj 
838038fd1498Szrj 	      /* Store vector loads in the corresponding SLP_NODE.  */
838138fd1498Szrj 	      if (slp && !slp_perm)
838238fd1498Szrj 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
838338fd1498Szrj 
838438fd1498Szrj 	      /* With SLP permutation we load the gaps as well, without
838538fd1498Szrj 	         we need to skip the gaps after we manage to fully load
838638fd1498Szrj 		 all elements.  group_gap_adj is GROUP_SIZE here.  */
838738fd1498Szrj 	      group_elt += nunits;
838838fd1498Szrj 	      if (maybe_ne (group_gap_adj, 0U)
838938fd1498Szrj 		  && !slp_perm
839038fd1498Szrj 		  && known_eq (group_elt, group_size - group_gap_adj))
839138fd1498Szrj 		{
839238fd1498Szrj 		  poly_wide_int bump_val
839338fd1498Szrj 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
839438fd1498Szrj 		       * group_gap_adj);
839538fd1498Szrj 		  tree bump = wide_int_to_tree (sizetype, bump_val);
839638fd1498Szrj 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
839738fd1498Szrj 						 stmt, bump);
839838fd1498Szrj 		  group_elt = 0;
839938fd1498Szrj 		}
840038fd1498Szrj 	    }
840138fd1498Szrj 	  /* Bump the vector pointer to account for a gap or for excess
840238fd1498Szrj 	     elements loaded for a permuted SLP load.  */
840338fd1498Szrj 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
840438fd1498Szrj 	    {
840538fd1498Szrj 	      poly_wide_int bump_val
840638fd1498Szrj 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
840738fd1498Szrj 		   * group_gap_adj);
840838fd1498Szrj 	      tree bump = wide_int_to_tree (sizetype, bump_val);
840938fd1498Szrj 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
841038fd1498Szrj 					     stmt, bump);
841138fd1498Szrj 	    }
841238fd1498Szrj 	}
841338fd1498Szrj 
841438fd1498Szrj       if (slp && !slp_perm)
841538fd1498Szrj 	continue;
841638fd1498Szrj 
841738fd1498Szrj       if (slp_perm)
841838fd1498Szrj         {
841938fd1498Szrj 	  unsigned n_perms;
842038fd1498Szrj           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
842138fd1498Szrj                                              slp_node_instance, false,
842238fd1498Szrj 					     &n_perms))
842338fd1498Szrj             {
842438fd1498Szrj               dr_chain.release ();
842538fd1498Szrj               return false;
842638fd1498Szrj             }
842738fd1498Szrj         }
842838fd1498Szrj       else
842938fd1498Szrj         {
843038fd1498Szrj           if (grouped_load)
843138fd1498Szrj   	    {
843238fd1498Szrj 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
843338fd1498Szrj 		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
843438fd1498Szrj 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
843538fd1498Szrj 	    }
843638fd1498Szrj           else
843738fd1498Szrj 	    {
843838fd1498Szrj 	      if (j == 0)
843938fd1498Szrj 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
844038fd1498Szrj 	      else
844138fd1498Szrj 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
844238fd1498Szrj 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
844338fd1498Szrj 	    }
844438fd1498Szrj         }
844538fd1498Szrj       dr_chain.release ();
844638fd1498Szrj     }
844738fd1498Szrj 
844838fd1498Szrj   return true;
844938fd1498Szrj }
845038fd1498Szrj 
845138fd1498Szrj /* Function vect_is_simple_cond.
845238fd1498Szrj 
845338fd1498Szrj    Input:
845438fd1498Szrj    LOOP - the loop that is being vectorized.
845538fd1498Szrj    COND - Condition that is checked for simple use.
845638fd1498Szrj 
845738fd1498Szrj    Output:
845838fd1498Szrj    *COMP_VECTYPE - the vector type for the comparison.
845938fd1498Szrj    *DTS - The def types for the arguments of the comparison
846038fd1498Szrj 
846138fd1498Szrj    Returns whether a COND can be vectorized.  Checks whether
846238fd1498Szrj    condition operands are supportable using vec_is_simple_use.  */
846338fd1498Szrj 
846438fd1498Szrj static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,tree * comp_vectype,enum vect_def_type * dts,tree vectype)846538fd1498Szrj vect_is_simple_cond (tree cond, vec_info *vinfo,
846638fd1498Szrj 		     tree *comp_vectype, enum vect_def_type *dts,
846738fd1498Szrj 		     tree vectype)
846838fd1498Szrj {
846938fd1498Szrj   tree lhs, rhs;
847038fd1498Szrj   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
847138fd1498Szrj 
847238fd1498Szrj   /* Mask case.  */
847338fd1498Szrj   if (TREE_CODE (cond) == SSA_NAME
847438fd1498Szrj       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
847538fd1498Szrj     {
847638fd1498Szrj       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
847738fd1498Szrj       if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
847838fd1498Szrj 			       &dts[0], comp_vectype)
847938fd1498Szrj 	  || !*comp_vectype
848038fd1498Szrj 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
848138fd1498Szrj 	return false;
848238fd1498Szrj       return true;
848338fd1498Szrj     }
848438fd1498Szrj 
848538fd1498Szrj   if (!COMPARISON_CLASS_P (cond))
848638fd1498Szrj     return false;
848738fd1498Szrj 
848838fd1498Szrj   lhs = TREE_OPERAND (cond, 0);
848938fd1498Szrj   rhs = TREE_OPERAND (cond, 1);
849038fd1498Szrj 
849138fd1498Szrj   if (TREE_CODE (lhs) == SSA_NAME)
849238fd1498Szrj     {
849338fd1498Szrj       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
849438fd1498Szrj       if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
849538fd1498Szrj 	return false;
849638fd1498Szrj     }
849738fd1498Szrj   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
849838fd1498Szrj 	   || TREE_CODE (lhs) == FIXED_CST)
849938fd1498Szrj     dts[0] = vect_constant_def;
850038fd1498Szrj   else
850138fd1498Szrj     return false;
850238fd1498Szrj 
850338fd1498Szrj   if (TREE_CODE (rhs) == SSA_NAME)
850438fd1498Szrj     {
850538fd1498Szrj       gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
850638fd1498Szrj       if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
850738fd1498Szrj 	return false;
850838fd1498Szrj     }
850938fd1498Szrj   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
851038fd1498Szrj 	   || TREE_CODE (rhs) == FIXED_CST)
851138fd1498Szrj     dts[1] = vect_constant_def;
851238fd1498Szrj   else
851338fd1498Szrj     return false;
851438fd1498Szrj 
851538fd1498Szrj   if (vectype1 && vectype2
851638fd1498Szrj       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
851738fd1498Szrj 		   TYPE_VECTOR_SUBPARTS (vectype2)))
851838fd1498Szrj     return false;
851938fd1498Szrj 
852038fd1498Szrj   *comp_vectype = vectype1 ? vectype1 : vectype2;
852138fd1498Szrj   /* Invariant comparison.  */
8522*58e805e6Szrj   if (! *comp_vectype && vectype)
852338fd1498Szrj     {
852438fd1498Szrj       tree scalar_type = TREE_TYPE (lhs);
852538fd1498Szrj       /* If we can widen the comparison to match vectype do so.  */
852638fd1498Szrj       if (INTEGRAL_TYPE_P (scalar_type)
852738fd1498Szrj 	  && tree_int_cst_lt (TYPE_SIZE (scalar_type),
852838fd1498Szrj 			      TYPE_SIZE (TREE_TYPE (vectype))))
852938fd1498Szrj 	scalar_type = build_nonstandard_integer_type
853038fd1498Szrj 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
853138fd1498Szrj 	   TYPE_UNSIGNED (scalar_type));
853238fd1498Szrj       *comp_vectype = get_vectype_for_scalar_type (scalar_type);
853338fd1498Szrj     }
853438fd1498Szrj 
853538fd1498Szrj   return true;
853638fd1498Szrj }
853738fd1498Szrj 
853838fd1498Szrj /* vectorizable_condition.
853938fd1498Szrj 
854038fd1498Szrj    Check if STMT is conditional modify expression that can be vectorized.
854138fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
854238fd1498Szrj    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
854338fd1498Szrj    at GSI.
854438fd1498Szrj 
854538fd1498Szrj    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
854638fd1498Szrj    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
854738fd1498Szrj    else clause if it is 2).
854838fd1498Szrj 
854938fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
855038fd1498Szrj 
855138fd1498Szrj bool
vectorizable_condition(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,int reduc_index,slp_tree slp_node)855238fd1498Szrj vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
855338fd1498Szrj 			gimple **vec_stmt, tree reduc_def, int reduc_index,
855438fd1498Szrj 			slp_tree slp_node)
855538fd1498Szrj {
855638fd1498Szrj   tree scalar_dest = NULL_TREE;
855738fd1498Szrj   tree vec_dest = NULL_TREE;
855838fd1498Szrj   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
855938fd1498Szrj   tree then_clause, else_clause;
856038fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
856138fd1498Szrj   tree comp_vectype = NULL_TREE;
856238fd1498Szrj   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
856338fd1498Szrj   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
856438fd1498Szrj   tree vec_compare;
856538fd1498Szrj   tree new_temp;
856638fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
856738fd1498Szrj   enum vect_def_type dts[4]
856838fd1498Szrj     = {vect_unknown_def_type, vect_unknown_def_type,
856938fd1498Szrj        vect_unknown_def_type, vect_unknown_def_type};
857038fd1498Szrj   int ndts = 4;
857138fd1498Szrj   int ncopies;
857238fd1498Szrj   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
857338fd1498Szrj   stmt_vec_info prev_stmt_info = NULL;
857438fd1498Szrj   int i, j;
857538fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
857638fd1498Szrj   vec<tree> vec_oprnds0 = vNULL;
857738fd1498Szrj   vec<tree> vec_oprnds1 = vNULL;
857838fd1498Szrj   vec<tree> vec_oprnds2 = vNULL;
857938fd1498Szrj   vec<tree> vec_oprnds3 = vNULL;
858038fd1498Szrj   tree vec_cmp_type;
858138fd1498Szrj   bool masked = false;
858238fd1498Szrj 
858338fd1498Szrj   if (reduc_index && STMT_SLP_TYPE (stmt_info))
858438fd1498Szrj     return false;
858538fd1498Szrj 
858638fd1498Szrj   vect_reduction_type reduction_type
858738fd1498Szrj     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
858838fd1498Szrj   if (reduction_type == TREE_CODE_REDUCTION)
858938fd1498Szrj     {
859038fd1498Szrj       if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
859138fd1498Szrj 	return false;
859238fd1498Szrj 
859338fd1498Szrj       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
859438fd1498Szrj 	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
859538fd1498Szrj 	       && reduc_def))
859638fd1498Szrj 	return false;
859738fd1498Szrj 
859838fd1498Szrj       /* FORNOW: not yet supported.  */
859938fd1498Szrj       if (STMT_VINFO_LIVE_P (stmt_info))
860038fd1498Szrj 	{
860138fd1498Szrj 	  if (dump_enabled_p ())
860238fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
860338fd1498Szrj 			     "value used after loop.\n");
860438fd1498Szrj 	  return false;
860538fd1498Szrj 	}
860638fd1498Szrj     }
860738fd1498Szrj 
860838fd1498Szrj   /* Is vectorizable conditional operation?  */
860938fd1498Szrj   if (!is_gimple_assign (stmt))
861038fd1498Szrj     return false;
861138fd1498Szrj 
861238fd1498Szrj   code = gimple_assign_rhs_code (stmt);
861338fd1498Szrj 
861438fd1498Szrj   if (code != COND_EXPR)
861538fd1498Szrj     return false;
861638fd1498Szrj 
861738fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
861838fd1498Szrj   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
861938fd1498Szrj 
862038fd1498Szrj   if (slp_node)
862138fd1498Szrj     ncopies = 1;
862238fd1498Szrj   else
862338fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
862438fd1498Szrj 
862538fd1498Szrj   gcc_assert (ncopies >= 1);
862638fd1498Szrj   if (reduc_index && ncopies > 1)
862738fd1498Szrj     return false; /* FORNOW */
862838fd1498Szrj 
862938fd1498Szrj   cond_expr = gimple_assign_rhs1 (stmt);
863038fd1498Szrj   then_clause = gimple_assign_rhs2 (stmt);
863138fd1498Szrj   else_clause = gimple_assign_rhs3 (stmt);
863238fd1498Szrj 
863338fd1498Szrj   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8634*58e805e6Szrj 			    &comp_vectype, &dts[0], slp_node ? NULL : vectype)
863538fd1498Szrj       || !comp_vectype)
863638fd1498Szrj     return false;
863738fd1498Szrj 
863838fd1498Szrj   gimple *def_stmt;
863938fd1498Szrj   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
864038fd1498Szrj 			   &vectype1))
864138fd1498Szrj     return false;
864238fd1498Szrj   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
864338fd1498Szrj 			   &vectype2))
864438fd1498Szrj     return false;
864538fd1498Szrj 
864638fd1498Szrj   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
864738fd1498Szrj     return false;
864838fd1498Szrj 
864938fd1498Szrj   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
865038fd1498Szrj     return false;
865138fd1498Szrj 
865238fd1498Szrj   masked = !COMPARISON_CLASS_P (cond_expr);
865338fd1498Szrj   vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
865438fd1498Szrj 
865538fd1498Szrj   if (vec_cmp_type == NULL_TREE)
865638fd1498Szrj     return false;
865738fd1498Szrj 
865838fd1498Szrj   cond_code = TREE_CODE (cond_expr);
865938fd1498Szrj   if (!masked)
866038fd1498Szrj     {
866138fd1498Szrj       cond_expr0 = TREE_OPERAND (cond_expr, 0);
866238fd1498Szrj       cond_expr1 = TREE_OPERAND (cond_expr, 1);
866338fd1498Szrj     }
866438fd1498Szrj 
866538fd1498Szrj   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
866638fd1498Szrj     {
866738fd1498Szrj       /* Boolean values may have another representation in vectors
866838fd1498Szrj 	 and therefore we prefer bit operations over comparison for
866938fd1498Szrj 	 them (which also works for scalar masks).  We store opcodes
867038fd1498Szrj 	 to use in bitop1 and bitop2.  Statement is vectorized as
867138fd1498Szrj 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
867238fd1498Szrj 	 depending on bitop1 and bitop2 arity.  */
867338fd1498Szrj       switch (cond_code)
867438fd1498Szrj 	{
867538fd1498Szrj 	case GT_EXPR:
867638fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
867738fd1498Szrj 	  bitop2 = BIT_AND_EXPR;
867838fd1498Szrj 	  break;
867938fd1498Szrj 	case GE_EXPR:
868038fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
868138fd1498Szrj 	  bitop2 = BIT_IOR_EXPR;
868238fd1498Szrj 	  break;
868338fd1498Szrj 	case LT_EXPR:
868438fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
868538fd1498Szrj 	  bitop2 = BIT_AND_EXPR;
868638fd1498Szrj 	  std::swap (cond_expr0, cond_expr1);
868738fd1498Szrj 	  break;
868838fd1498Szrj 	case LE_EXPR:
868938fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
869038fd1498Szrj 	  bitop2 = BIT_IOR_EXPR;
869138fd1498Szrj 	  std::swap (cond_expr0, cond_expr1);
869238fd1498Szrj 	  break;
869338fd1498Szrj 	case NE_EXPR:
869438fd1498Szrj 	  bitop1 = BIT_XOR_EXPR;
869538fd1498Szrj 	  break;
869638fd1498Szrj 	case EQ_EXPR:
869738fd1498Szrj 	  bitop1 = BIT_XOR_EXPR;
869838fd1498Szrj 	  bitop2 = BIT_NOT_EXPR;
869938fd1498Szrj 	  break;
870038fd1498Szrj 	default:
870138fd1498Szrj 	  return false;
870238fd1498Szrj 	}
870338fd1498Szrj       cond_code = SSA_NAME;
870438fd1498Szrj     }
870538fd1498Szrj 
870638fd1498Szrj   if (!vec_stmt)
870738fd1498Szrj     {
870838fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
870938fd1498Szrj       if (bitop1 != NOP_EXPR)
871038fd1498Szrj 	{
871138fd1498Szrj 	  machine_mode mode = TYPE_MODE (comp_vectype);
871238fd1498Szrj 	  optab optab;
871338fd1498Szrj 
871438fd1498Szrj 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
871538fd1498Szrj 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
871638fd1498Szrj 	    return false;
871738fd1498Szrj 
871838fd1498Szrj 	  if (bitop2 != NOP_EXPR)
871938fd1498Szrj 	    {
872038fd1498Szrj 	      optab = optab_for_tree_code (bitop2, comp_vectype,
872138fd1498Szrj 					   optab_default);
872238fd1498Szrj 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
872338fd1498Szrj 		return false;
872438fd1498Szrj 	    }
872538fd1498Szrj 	}
872638fd1498Szrj       if (expand_vec_cond_expr_p (vectype, comp_vectype,
872738fd1498Szrj 				     cond_code))
872838fd1498Szrj 	{
872938fd1498Szrj 	  if (!slp_node)
873038fd1498Szrj 	    vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
873138fd1498Szrj 	  return true;
873238fd1498Szrj 	}
873338fd1498Szrj       return false;
873438fd1498Szrj     }
873538fd1498Szrj 
873638fd1498Szrj   /* Transform.  */
873738fd1498Szrj 
873838fd1498Szrj   if (!slp_node)
873938fd1498Szrj     {
874038fd1498Szrj       vec_oprnds0.create (1);
874138fd1498Szrj       vec_oprnds1.create (1);
874238fd1498Szrj       vec_oprnds2.create (1);
874338fd1498Szrj       vec_oprnds3.create (1);
874438fd1498Szrj     }
874538fd1498Szrj 
874638fd1498Szrj   /* Handle def.  */
874738fd1498Szrj   scalar_dest = gimple_assign_lhs (stmt);
874838fd1498Szrj   if (reduction_type != EXTRACT_LAST_REDUCTION)
874938fd1498Szrj     vec_dest = vect_create_destination_var (scalar_dest, vectype);
875038fd1498Szrj 
875138fd1498Szrj   /* Handle cond expr.  */
875238fd1498Szrj   for (j = 0; j < ncopies; j++)
875338fd1498Szrj     {
875438fd1498Szrj       gimple *new_stmt = NULL;
875538fd1498Szrj       if (j == 0)
875638fd1498Szrj 	{
875738fd1498Szrj           if (slp_node)
875838fd1498Szrj             {
875938fd1498Szrj               auto_vec<tree, 4> ops;
876038fd1498Szrj 	      auto_vec<vec<tree>, 4> vec_defs;
876138fd1498Szrj 
876238fd1498Szrj 	      if (masked)
876338fd1498Szrj 		ops.safe_push (cond_expr);
876438fd1498Szrj 	      else
876538fd1498Szrj 		{
876638fd1498Szrj 		  ops.safe_push (cond_expr0);
876738fd1498Szrj 		  ops.safe_push (cond_expr1);
876838fd1498Szrj 		}
876938fd1498Szrj               ops.safe_push (then_clause);
877038fd1498Szrj               ops.safe_push (else_clause);
877138fd1498Szrj               vect_get_slp_defs (ops, slp_node, &vec_defs);
877238fd1498Szrj 	      vec_oprnds3 = vec_defs.pop ();
877338fd1498Szrj 	      vec_oprnds2 = vec_defs.pop ();
877438fd1498Szrj 	      if (!masked)
877538fd1498Szrj 		vec_oprnds1 = vec_defs.pop ();
877638fd1498Szrj 	      vec_oprnds0 = vec_defs.pop ();
877738fd1498Szrj             }
877838fd1498Szrj           else
877938fd1498Szrj             {
878038fd1498Szrj 	      gimple *gtemp;
878138fd1498Szrj 	      if (masked)
878238fd1498Szrj 		{
878338fd1498Szrj 		  vec_cond_lhs
878438fd1498Szrj 		    = vect_get_vec_def_for_operand (cond_expr, stmt,
878538fd1498Szrj 						    comp_vectype);
878638fd1498Szrj 		  vect_is_simple_use (cond_expr, stmt_info->vinfo,
878738fd1498Szrj 				      &gtemp, &dts[0]);
878838fd1498Szrj 		}
878938fd1498Szrj 	      else
879038fd1498Szrj 		{
879138fd1498Szrj 		  vec_cond_lhs
879238fd1498Szrj 		    = vect_get_vec_def_for_operand (cond_expr0,
879338fd1498Szrj 						    stmt, comp_vectype);
879438fd1498Szrj 		  vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
879538fd1498Szrj 
879638fd1498Szrj 		  vec_cond_rhs
879738fd1498Szrj 		    = vect_get_vec_def_for_operand (cond_expr1,
879838fd1498Szrj 						    stmt, comp_vectype);
879938fd1498Szrj 		  vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
880038fd1498Szrj 		}
880138fd1498Szrj 	      if (reduc_index == 1)
880238fd1498Szrj 		vec_then_clause = reduc_def;
880338fd1498Szrj 	      else
880438fd1498Szrj 		{
880538fd1498Szrj 		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
880638fd1498Szrj 								  stmt);
880738fd1498Szrj 	          vect_is_simple_use (then_clause, loop_vinfo,
880838fd1498Szrj 				      &gtemp, &dts[2]);
880938fd1498Szrj 		}
881038fd1498Szrj 	      if (reduc_index == 2)
881138fd1498Szrj 		vec_else_clause = reduc_def;
881238fd1498Szrj 	      else
881338fd1498Szrj 		{
881438fd1498Szrj 		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
881538fd1498Szrj 								  stmt);
881638fd1498Szrj 		  vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
881738fd1498Szrj 		}
881838fd1498Szrj 	    }
881938fd1498Szrj 	}
882038fd1498Szrj       else
882138fd1498Szrj 	{
882238fd1498Szrj 	  vec_cond_lhs
882338fd1498Szrj 	    = vect_get_vec_def_for_stmt_copy (dts[0],
882438fd1498Szrj 					      vec_oprnds0.pop ());
882538fd1498Szrj 	  if (!masked)
882638fd1498Szrj 	    vec_cond_rhs
882738fd1498Szrj 	      = vect_get_vec_def_for_stmt_copy (dts[1],
882838fd1498Szrj 						vec_oprnds1.pop ());
882938fd1498Szrj 
883038fd1498Szrj 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
883138fd1498Szrj 							    vec_oprnds2.pop ());
883238fd1498Szrj 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
883338fd1498Szrj 							    vec_oprnds3.pop ());
883438fd1498Szrj 	}
883538fd1498Szrj 
883638fd1498Szrj       if (!slp_node)
883738fd1498Szrj         {
883838fd1498Szrj 	  vec_oprnds0.quick_push (vec_cond_lhs);
883938fd1498Szrj 	  if (!masked)
884038fd1498Szrj 	    vec_oprnds1.quick_push (vec_cond_rhs);
884138fd1498Szrj 	  vec_oprnds2.quick_push (vec_then_clause);
884238fd1498Szrj 	  vec_oprnds3.quick_push (vec_else_clause);
884338fd1498Szrj 	}
884438fd1498Szrj 
884538fd1498Szrj       /* Arguments are ready.  Create the new vector stmt.  */
884638fd1498Szrj       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
884738fd1498Szrj         {
884838fd1498Szrj           vec_then_clause = vec_oprnds2[i];
884938fd1498Szrj           vec_else_clause = vec_oprnds3[i];
885038fd1498Szrj 
885138fd1498Szrj 	  if (masked)
885238fd1498Szrj 	    vec_compare = vec_cond_lhs;
885338fd1498Szrj 	  else
885438fd1498Szrj 	    {
885538fd1498Szrj 	      vec_cond_rhs = vec_oprnds1[i];
885638fd1498Szrj 	      if (bitop1 == NOP_EXPR)
885738fd1498Szrj 		vec_compare = build2 (cond_code, vec_cmp_type,
885838fd1498Szrj 				      vec_cond_lhs, vec_cond_rhs);
885938fd1498Szrj 	      else
886038fd1498Szrj 		{
886138fd1498Szrj 		  new_temp = make_ssa_name (vec_cmp_type);
886238fd1498Szrj 		  if (bitop1 == BIT_NOT_EXPR)
886338fd1498Szrj 		    new_stmt = gimple_build_assign (new_temp, bitop1,
886438fd1498Szrj 						    vec_cond_rhs);
886538fd1498Szrj 		  else
886638fd1498Szrj 		    new_stmt
886738fd1498Szrj 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
886838fd1498Szrj 					     vec_cond_rhs);
886938fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
887038fd1498Szrj 		  if (bitop2 == NOP_EXPR)
887138fd1498Szrj 		    vec_compare = new_temp;
887238fd1498Szrj 		  else if (bitop2 == BIT_NOT_EXPR)
887338fd1498Szrj 		    {
887438fd1498Szrj 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
887538fd1498Szrj 		      vec_compare = new_temp;
887638fd1498Szrj 		      std::swap (vec_then_clause, vec_else_clause);
887738fd1498Szrj 		    }
887838fd1498Szrj 		  else
887938fd1498Szrj 		    {
888038fd1498Szrj 		      vec_compare = make_ssa_name (vec_cmp_type);
888138fd1498Szrj 		      new_stmt
888238fd1498Szrj 			= gimple_build_assign (vec_compare, bitop2,
888338fd1498Szrj 					       vec_cond_lhs, new_temp);
888438fd1498Szrj 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
888538fd1498Szrj 		    }
888638fd1498Szrj 		}
888738fd1498Szrj 	    }
888838fd1498Szrj 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
888938fd1498Szrj 	    {
889038fd1498Szrj 	      if (!is_gimple_val (vec_compare))
889138fd1498Szrj 		{
889238fd1498Szrj 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
889338fd1498Szrj 		  new_stmt = gimple_build_assign (vec_compare_name,
889438fd1498Szrj 						  vec_compare);
889538fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
889638fd1498Szrj 		  vec_compare = vec_compare_name;
889738fd1498Szrj 		}
889838fd1498Szrj 	      gcc_assert (reduc_index == 2);
889938fd1498Szrj 	      new_stmt = gimple_build_call_internal
890038fd1498Szrj 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
890138fd1498Szrj 		 vec_then_clause);
890238fd1498Szrj 	      gimple_call_set_lhs (new_stmt, scalar_dest);
890338fd1498Szrj 	      SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
890438fd1498Szrj 	      if (stmt == gsi_stmt (*gsi))
890538fd1498Szrj 		vect_finish_replace_stmt (stmt, new_stmt);
890638fd1498Szrj 	      else
890738fd1498Szrj 		{
890838fd1498Szrj 		  /* In this case we're moving the definition to later in the
890938fd1498Szrj 		     block.  That doesn't matter because the only uses of the
891038fd1498Szrj 		     lhs are in phi statements.  */
891138fd1498Szrj 		  gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
891238fd1498Szrj 		  gsi_remove (&old_gsi, true);
891338fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
891438fd1498Szrj 		}
891538fd1498Szrj 	    }
891638fd1498Szrj 	  else
891738fd1498Szrj 	    {
891838fd1498Szrj 	      new_temp = make_ssa_name (vec_dest);
891938fd1498Szrj 	      new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
892038fd1498Szrj 					      vec_compare, vec_then_clause,
892138fd1498Szrj 					      vec_else_clause);
892238fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
892338fd1498Szrj 	    }
892438fd1498Szrj           if (slp_node)
892538fd1498Szrj             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
892638fd1498Szrj         }
892738fd1498Szrj 
892838fd1498Szrj         if (slp_node)
892938fd1498Szrj           continue;
893038fd1498Szrj 
893138fd1498Szrj         if (j == 0)
893238fd1498Szrj           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
893338fd1498Szrj         else
893438fd1498Szrj           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
893538fd1498Szrj 
893638fd1498Szrj         prev_stmt_info = vinfo_for_stmt (new_stmt);
893738fd1498Szrj     }
893838fd1498Szrj 
893938fd1498Szrj   vec_oprnds0.release ();
894038fd1498Szrj   vec_oprnds1.release ();
894138fd1498Szrj   vec_oprnds2.release ();
894238fd1498Szrj   vec_oprnds3.release ();
894338fd1498Szrj 
894438fd1498Szrj   return true;
894538fd1498Szrj }
894638fd1498Szrj 
894738fd1498Szrj /* vectorizable_comparison.
894838fd1498Szrj 
894938fd1498Szrj    Check if STMT is comparison expression that can be vectorized.
895038fd1498Szrj    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
895138fd1498Szrj    comparison, put it in VEC_STMT, and insert it at GSI.
895238fd1498Szrj 
895338fd1498Szrj    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
895438fd1498Szrj 
895538fd1498Szrj static bool
vectorizable_comparison(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,slp_tree slp_node)895638fd1498Szrj vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
895738fd1498Szrj 			 gimple **vec_stmt, tree reduc_def,
895838fd1498Szrj 			 slp_tree slp_node)
895938fd1498Szrj {
896038fd1498Szrj   tree lhs, rhs1, rhs2;
896138fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
896238fd1498Szrj   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
896338fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896438fd1498Szrj   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
896538fd1498Szrj   tree new_temp;
896638fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
896738fd1498Szrj   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
896838fd1498Szrj   int ndts = 2;
896938fd1498Szrj   poly_uint64 nunits;
897038fd1498Szrj   int ncopies;
897138fd1498Szrj   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
897238fd1498Szrj   stmt_vec_info prev_stmt_info = NULL;
897338fd1498Szrj   int i, j;
897438fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
897538fd1498Szrj   vec<tree> vec_oprnds0 = vNULL;
897638fd1498Szrj   vec<tree> vec_oprnds1 = vNULL;
897738fd1498Szrj   gimple *def_stmt;
897838fd1498Szrj   tree mask_type;
897938fd1498Szrj   tree mask;
898038fd1498Szrj 
898138fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
898238fd1498Szrj     return false;
898338fd1498Szrj 
898438fd1498Szrj   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
898538fd1498Szrj     return false;
898638fd1498Szrj 
898738fd1498Szrj   mask_type = vectype;
898838fd1498Szrj   nunits = TYPE_VECTOR_SUBPARTS (vectype);
898938fd1498Szrj 
899038fd1498Szrj   if (slp_node)
899138fd1498Szrj     ncopies = 1;
899238fd1498Szrj   else
899338fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, vectype);
899438fd1498Szrj 
899538fd1498Szrj   gcc_assert (ncopies >= 1);
899638fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
899738fd1498Szrj       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
899838fd1498Szrj 	   && reduc_def))
899938fd1498Szrj     return false;
900038fd1498Szrj 
900138fd1498Szrj   if (STMT_VINFO_LIVE_P (stmt_info))
900238fd1498Szrj     {
900338fd1498Szrj       if (dump_enabled_p ())
900438fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
900538fd1498Szrj 			 "value used after loop.\n");
900638fd1498Szrj       return false;
900738fd1498Szrj     }
900838fd1498Szrj 
900938fd1498Szrj   if (!is_gimple_assign (stmt))
901038fd1498Szrj     return false;
901138fd1498Szrj 
901238fd1498Szrj   code = gimple_assign_rhs_code (stmt);
901338fd1498Szrj 
901438fd1498Szrj   if (TREE_CODE_CLASS (code) != tcc_comparison)
901538fd1498Szrj     return false;
901638fd1498Szrj 
901738fd1498Szrj   rhs1 = gimple_assign_rhs1 (stmt);
901838fd1498Szrj   rhs2 = gimple_assign_rhs2 (stmt);
901938fd1498Szrj 
902038fd1498Szrj   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
902138fd1498Szrj 			   &dts[0], &vectype1))
902238fd1498Szrj     return false;
902338fd1498Szrj 
902438fd1498Szrj   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
902538fd1498Szrj 			   &dts[1], &vectype2))
902638fd1498Szrj     return false;
902738fd1498Szrj 
902838fd1498Szrj   if (vectype1 && vectype2
902938fd1498Szrj       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
903038fd1498Szrj 		   TYPE_VECTOR_SUBPARTS (vectype2)))
903138fd1498Szrj     return false;
903238fd1498Szrj 
903338fd1498Szrj   vectype = vectype1 ? vectype1 : vectype2;
903438fd1498Szrj 
903538fd1498Szrj   /* Invariant comparison.  */
903638fd1498Szrj   if (!vectype)
903738fd1498Szrj     {
903838fd1498Szrj       vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
903938fd1498Szrj       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
904038fd1498Szrj 	return false;
904138fd1498Szrj     }
904238fd1498Szrj   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
904338fd1498Szrj     return false;
904438fd1498Szrj 
904538fd1498Szrj   /* Can't compare mask and non-mask types.  */
904638fd1498Szrj   if (vectype1 && vectype2
904738fd1498Szrj       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
904838fd1498Szrj     return false;
904938fd1498Szrj 
905038fd1498Szrj   /* Boolean values may have another representation in vectors
905138fd1498Szrj      and therefore we prefer bit operations over comparison for
905238fd1498Szrj      them (which also works for scalar masks).  We store opcodes
905338fd1498Szrj      to use in bitop1 and bitop2.  Statement is vectorized as
905438fd1498Szrj        BITOP2 (rhs1 BITOP1 rhs2) or
905538fd1498Szrj        rhs1 BITOP2 (BITOP1 rhs2)
905638fd1498Szrj      depending on bitop1 and bitop2 arity.  */
905738fd1498Szrj   if (VECTOR_BOOLEAN_TYPE_P (vectype))
905838fd1498Szrj     {
905938fd1498Szrj       if (code == GT_EXPR)
906038fd1498Szrj 	{
906138fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
906238fd1498Szrj 	  bitop2 = BIT_AND_EXPR;
906338fd1498Szrj 	}
906438fd1498Szrj       else if (code == GE_EXPR)
906538fd1498Szrj 	{
906638fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
906738fd1498Szrj 	  bitop2 = BIT_IOR_EXPR;
906838fd1498Szrj 	}
906938fd1498Szrj       else if (code == LT_EXPR)
907038fd1498Szrj 	{
907138fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
907238fd1498Szrj 	  bitop2 = BIT_AND_EXPR;
907338fd1498Szrj 	  std::swap (rhs1, rhs2);
907438fd1498Szrj 	  std::swap (dts[0], dts[1]);
907538fd1498Szrj 	}
907638fd1498Szrj       else if (code == LE_EXPR)
907738fd1498Szrj 	{
907838fd1498Szrj 	  bitop1 = BIT_NOT_EXPR;
907938fd1498Szrj 	  bitop2 = BIT_IOR_EXPR;
908038fd1498Szrj 	  std::swap (rhs1, rhs2);
908138fd1498Szrj 	  std::swap (dts[0], dts[1]);
908238fd1498Szrj 	}
908338fd1498Szrj       else
908438fd1498Szrj 	{
908538fd1498Szrj 	  bitop1 = BIT_XOR_EXPR;
908638fd1498Szrj 	  if (code == EQ_EXPR)
908738fd1498Szrj 	    bitop2 = BIT_NOT_EXPR;
908838fd1498Szrj 	}
908938fd1498Szrj     }
909038fd1498Szrj 
909138fd1498Szrj   if (!vec_stmt)
909238fd1498Szrj     {
909338fd1498Szrj       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
909438fd1498Szrj       if (!slp_node)
909538fd1498Szrj 	vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
909638fd1498Szrj 				dts, ndts, NULL, NULL);
909738fd1498Szrj       if (bitop1 == NOP_EXPR)
909838fd1498Szrj 	return expand_vec_cmp_expr_p (vectype, mask_type, code);
909938fd1498Szrj       else
910038fd1498Szrj 	{
910138fd1498Szrj 	  machine_mode mode = TYPE_MODE (vectype);
910238fd1498Szrj 	  optab optab;
910338fd1498Szrj 
910438fd1498Szrj 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
910538fd1498Szrj 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
910638fd1498Szrj 	    return false;
910738fd1498Szrj 
910838fd1498Szrj 	  if (bitop2 != NOP_EXPR)
910938fd1498Szrj 	    {
911038fd1498Szrj 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
911138fd1498Szrj 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
911238fd1498Szrj 		return false;
911338fd1498Szrj 	    }
911438fd1498Szrj 	  return true;
911538fd1498Szrj 	}
911638fd1498Szrj     }
911738fd1498Szrj 
911838fd1498Szrj   /* Transform.  */
911938fd1498Szrj   if (!slp_node)
912038fd1498Szrj     {
912138fd1498Szrj       vec_oprnds0.create (1);
912238fd1498Szrj       vec_oprnds1.create (1);
912338fd1498Szrj     }
912438fd1498Szrj 
912538fd1498Szrj   /* Handle def.  */
912638fd1498Szrj   lhs = gimple_assign_lhs (stmt);
912738fd1498Szrj   mask = vect_create_destination_var (lhs, mask_type);
912838fd1498Szrj 
912938fd1498Szrj   /* Handle cmp expr.  */
913038fd1498Szrj   for (j = 0; j < ncopies; j++)
913138fd1498Szrj     {
913238fd1498Szrj       gassign *new_stmt = NULL;
913338fd1498Szrj       if (j == 0)
913438fd1498Szrj 	{
913538fd1498Szrj 	  if (slp_node)
913638fd1498Szrj 	    {
913738fd1498Szrj 	      auto_vec<tree, 2> ops;
913838fd1498Szrj 	      auto_vec<vec<tree>, 2> vec_defs;
913938fd1498Szrj 
914038fd1498Szrj 	      ops.safe_push (rhs1);
914138fd1498Szrj 	      ops.safe_push (rhs2);
914238fd1498Szrj 	      vect_get_slp_defs (ops, slp_node, &vec_defs);
914338fd1498Szrj 	      vec_oprnds1 = vec_defs.pop ();
914438fd1498Szrj 	      vec_oprnds0 = vec_defs.pop ();
914538fd1498Szrj 	    }
914638fd1498Szrj 	  else
914738fd1498Szrj 	    {
914838fd1498Szrj 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
914938fd1498Szrj 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
915038fd1498Szrj 	    }
915138fd1498Szrj 	}
915238fd1498Szrj       else
915338fd1498Szrj 	{
915438fd1498Szrj 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
915538fd1498Szrj 						     vec_oprnds0.pop ());
915638fd1498Szrj 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
915738fd1498Szrj 						     vec_oprnds1.pop ());
915838fd1498Szrj 	}
915938fd1498Szrj 
916038fd1498Szrj       if (!slp_node)
916138fd1498Szrj 	{
916238fd1498Szrj 	  vec_oprnds0.quick_push (vec_rhs1);
916338fd1498Szrj 	  vec_oprnds1.quick_push (vec_rhs2);
916438fd1498Szrj 	}
916538fd1498Szrj 
916638fd1498Szrj       /* Arguments are ready.  Create the new vector stmt.  */
916738fd1498Szrj       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
916838fd1498Szrj 	{
916938fd1498Szrj 	  vec_rhs2 = vec_oprnds1[i];
917038fd1498Szrj 
917138fd1498Szrj 	  new_temp = make_ssa_name (mask);
917238fd1498Szrj 	  if (bitop1 == NOP_EXPR)
917338fd1498Szrj 	    {
917438fd1498Szrj 	      new_stmt = gimple_build_assign (new_temp, code,
917538fd1498Szrj 					      vec_rhs1, vec_rhs2);
917638fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
917738fd1498Szrj 	    }
917838fd1498Szrj 	  else
917938fd1498Szrj 	    {
918038fd1498Szrj 	      if (bitop1 == BIT_NOT_EXPR)
918138fd1498Szrj 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
918238fd1498Szrj 	      else
918338fd1498Szrj 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
918438fd1498Szrj 						vec_rhs2);
918538fd1498Szrj 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
918638fd1498Szrj 	      if (bitop2 != NOP_EXPR)
918738fd1498Szrj 		{
918838fd1498Szrj 		  tree res = make_ssa_name (mask);
918938fd1498Szrj 		  if (bitop2 == BIT_NOT_EXPR)
919038fd1498Szrj 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
919138fd1498Szrj 		  else
919238fd1498Szrj 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
919338fd1498Szrj 						    new_temp);
919438fd1498Szrj 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
919538fd1498Szrj 		}
919638fd1498Szrj 	    }
919738fd1498Szrj 	  if (slp_node)
919838fd1498Szrj 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
919938fd1498Szrj 	}
920038fd1498Szrj 
920138fd1498Szrj       if (slp_node)
920238fd1498Szrj 	continue;
920338fd1498Szrj 
920438fd1498Szrj       if (j == 0)
920538fd1498Szrj 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
920638fd1498Szrj       else
920738fd1498Szrj 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
920838fd1498Szrj 
920938fd1498Szrj       prev_stmt_info = vinfo_for_stmt (new_stmt);
921038fd1498Szrj     }
921138fd1498Szrj 
921238fd1498Szrj   vec_oprnds0.release ();
921338fd1498Szrj   vec_oprnds1.release ();
921438fd1498Szrj 
921538fd1498Szrj   return true;
921638fd1498Szrj }
921738fd1498Szrj 
921838fd1498Szrj /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
921938fd1498Szrj    can handle all live statements in the node.  Otherwise return true
922038fd1498Szrj    if STMT is not live or if vectorizable_live_operation can handle it.
922138fd1498Szrj    GSI and VEC_STMT are as for vectorizable_live_operation.  */
922238fd1498Szrj 
922338fd1498Szrj static bool
can_vectorize_live_stmts(gimple * stmt,gimple_stmt_iterator * gsi,slp_tree slp_node,gimple ** vec_stmt)922438fd1498Szrj can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
922538fd1498Szrj 			  slp_tree slp_node, gimple **vec_stmt)
922638fd1498Szrj {
922738fd1498Szrj   if (slp_node)
922838fd1498Szrj     {
922938fd1498Szrj       gimple *slp_stmt;
923038fd1498Szrj       unsigned int i;
923138fd1498Szrj       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
923238fd1498Szrj 	{
923338fd1498Szrj 	  stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
923438fd1498Szrj 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
923538fd1498Szrj 	      && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
923638fd1498Szrj 					       vec_stmt))
923738fd1498Szrj 	    return false;
923838fd1498Szrj 	}
923938fd1498Szrj     }
924038fd1498Szrj   else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
924138fd1498Szrj 	   && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
924238fd1498Szrj     return false;
924338fd1498Szrj 
924438fd1498Szrj   return true;
924538fd1498Szrj }
924638fd1498Szrj 
924738fd1498Szrj /* Make sure the statement is vectorizable.  */
924838fd1498Szrj 
924938fd1498Szrj bool
vect_analyze_stmt(gimple * stmt,bool * need_to_vectorize,slp_tree node,slp_instance node_instance)925038fd1498Szrj vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
925138fd1498Szrj 		   slp_instance node_instance)
925238fd1498Szrj {
925338fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
925438fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
925538fd1498Szrj   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
925638fd1498Szrj   bool ok;
925738fd1498Szrj   gimple *pattern_stmt;
925838fd1498Szrj   gimple_seq pattern_def_seq;
925938fd1498Szrj 
926038fd1498Szrj   if (dump_enabled_p ())
926138fd1498Szrj     {
926238fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
926338fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
926438fd1498Szrj     }
926538fd1498Szrj 
926638fd1498Szrj   if (gimple_has_volatile_ops (stmt))
926738fd1498Szrj     {
926838fd1498Szrj       if (dump_enabled_p ())
926938fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
927038fd1498Szrj                          "not vectorized: stmt has volatile operands\n");
927138fd1498Szrj 
927238fd1498Szrj       return false;
927338fd1498Szrj     }
927438fd1498Szrj 
927538fd1498Szrj   /* Skip stmts that do not need to be vectorized. In loops this is expected
927638fd1498Szrj      to include:
927738fd1498Szrj      - the COND_EXPR which is the loop exit condition
927838fd1498Szrj      - any LABEL_EXPRs in the loop
927938fd1498Szrj      - computations that are used only for array indexing or loop control.
928038fd1498Szrj      In basic blocks we only analyze statements that are a part of some SLP
928138fd1498Szrj      instance, therefore, all the statements are relevant.
928238fd1498Szrj 
928338fd1498Szrj      Pattern statement needs to be analyzed instead of the original statement
928438fd1498Szrj      if the original statement is not relevant.  Otherwise, we analyze both
928538fd1498Szrj      statements.  In basic blocks we are called from some SLP instance
928638fd1498Szrj      traversal, don't analyze pattern stmts instead, the pattern stmts
928738fd1498Szrj      already will be part of SLP instance.  */
928838fd1498Szrj 
928938fd1498Szrj   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
929038fd1498Szrj   if (!STMT_VINFO_RELEVANT_P (stmt_info)
929138fd1498Szrj       && !STMT_VINFO_LIVE_P (stmt_info))
929238fd1498Szrj     {
929338fd1498Szrj       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
929438fd1498Szrj           && pattern_stmt
929538fd1498Szrj           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
929638fd1498Szrj               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
929738fd1498Szrj         {
929838fd1498Szrj           /* Analyze PATTERN_STMT instead of the original stmt.  */
929938fd1498Szrj           stmt = pattern_stmt;
930038fd1498Szrj           stmt_info = vinfo_for_stmt (pattern_stmt);
930138fd1498Szrj           if (dump_enabled_p ())
930238fd1498Szrj             {
930338fd1498Szrj               dump_printf_loc (MSG_NOTE, vect_location,
930438fd1498Szrj                                "==> examining pattern statement: ");
930538fd1498Szrj               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
930638fd1498Szrj             }
930738fd1498Szrj         }
930838fd1498Szrj       else
930938fd1498Szrj         {
931038fd1498Szrj           if (dump_enabled_p ())
931138fd1498Szrj             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
931238fd1498Szrj 
931338fd1498Szrj           return true;
931438fd1498Szrj         }
931538fd1498Szrj     }
931638fd1498Szrj   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
931738fd1498Szrj 	   && node == NULL
931838fd1498Szrj            && pattern_stmt
931938fd1498Szrj            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
932038fd1498Szrj                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
932138fd1498Szrj     {
932238fd1498Szrj       /* Analyze PATTERN_STMT too.  */
932338fd1498Szrj       if (dump_enabled_p ())
932438fd1498Szrj         {
932538fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
932638fd1498Szrj                            "==> examining pattern statement: ");
932738fd1498Szrj           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
932838fd1498Szrj         }
932938fd1498Szrj 
933038fd1498Szrj       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
933138fd1498Szrj 			      node_instance))
933238fd1498Szrj         return false;
933338fd1498Szrj    }
933438fd1498Szrj 
933538fd1498Szrj   if (is_pattern_stmt_p (stmt_info)
933638fd1498Szrj       && node == NULL
933738fd1498Szrj       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
933838fd1498Szrj     {
933938fd1498Szrj       gimple_stmt_iterator si;
934038fd1498Szrj 
934138fd1498Szrj       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
934238fd1498Szrj 	{
934338fd1498Szrj 	  gimple *pattern_def_stmt = gsi_stmt (si);
934438fd1498Szrj 	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
934538fd1498Szrj 	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
934638fd1498Szrj 	    {
934738fd1498Szrj 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
934838fd1498Szrj 	      if (dump_enabled_p ())
934938fd1498Szrj 		{
935038fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location,
935138fd1498Szrj                                    "==> examining pattern def statement: ");
935238fd1498Szrj 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
935338fd1498Szrj 		}
935438fd1498Szrj 
935538fd1498Szrj 	      if (!vect_analyze_stmt (pattern_def_stmt,
935638fd1498Szrj 				      need_to_vectorize, node, node_instance))
935738fd1498Szrj 		return false;
935838fd1498Szrj 	    }
935938fd1498Szrj 	}
936038fd1498Szrj     }
936138fd1498Szrj 
936238fd1498Szrj   switch (STMT_VINFO_DEF_TYPE (stmt_info))
936338fd1498Szrj     {
936438fd1498Szrj       case vect_internal_def:
936538fd1498Szrj         break;
936638fd1498Szrj 
936738fd1498Szrj       case vect_reduction_def:
936838fd1498Szrj       case vect_nested_cycle:
936938fd1498Szrj          gcc_assert (!bb_vinfo
937038fd1498Szrj 		     && (relevance == vect_used_in_outer
937138fd1498Szrj 			 || relevance == vect_used_in_outer_by_reduction
937238fd1498Szrj 			 || relevance == vect_used_by_reduction
937338fd1498Szrj 			 || relevance == vect_unused_in_scope
937438fd1498Szrj 			 || relevance == vect_used_only_live));
937538fd1498Szrj          break;
937638fd1498Szrj 
937738fd1498Szrj       case vect_induction_def:
937838fd1498Szrj 	gcc_assert (!bb_vinfo);
937938fd1498Szrj 	break;
938038fd1498Szrj 
938138fd1498Szrj       case vect_constant_def:
938238fd1498Szrj       case vect_external_def:
938338fd1498Szrj       case vect_unknown_def_type:
938438fd1498Szrj       default:
938538fd1498Szrj         gcc_unreachable ();
938638fd1498Szrj     }
938738fd1498Szrj 
938838fd1498Szrj   if (STMT_VINFO_RELEVANT_P (stmt_info))
938938fd1498Szrj     {
939038fd1498Szrj       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
939138fd1498Szrj       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
939238fd1498Szrj 		  || (is_gimple_call (stmt)
939338fd1498Szrj 		      && gimple_call_lhs (stmt) == NULL_TREE));
939438fd1498Szrj       *need_to_vectorize = true;
939538fd1498Szrj     }
939638fd1498Szrj 
939738fd1498Szrj   if (PURE_SLP_STMT (stmt_info) && !node)
939838fd1498Szrj     {
939938fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
940038fd1498Szrj 		       "handled only by SLP analysis\n");
940138fd1498Szrj       return true;
940238fd1498Szrj     }
940338fd1498Szrj 
940438fd1498Szrj   ok = true;
940538fd1498Szrj   if (!bb_vinfo
940638fd1498Szrj       && (STMT_VINFO_RELEVANT_P (stmt_info)
940738fd1498Szrj 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
940838fd1498Szrj     ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
940938fd1498Szrj 	  || vectorizable_conversion (stmt, NULL, NULL, node)
941038fd1498Szrj 	  || vectorizable_shift (stmt, NULL, NULL, node)
941138fd1498Szrj 	  || vectorizable_operation (stmt, NULL, NULL, node)
941238fd1498Szrj 	  || vectorizable_assignment (stmt, NULL, NULL, node)
941338fd1498Szrj 	  || vectorizable_load (stmt, NULL, NULL, node, NULL)
941438fd1498Szrj 	  || vectorizable_call (stmt, NULL, NULL, node)
941538fd1498Szrj 	  || vectorizable_store (stmt, NULL, NULL, node)
941638fd1498Szrj 	  || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
941738fd1498Szrj 	  || vectorizable_induction (stmt, NULL, NULL, node)
941838fd1498Szrj 	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
941938fd1498Szrj 	  || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
942038fd1498Szrj   else
942138fd1498Szrj     {
942238fd1498Szrj       if (bb_vinfo)
942338fd1498Szrj 	ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
942438fd1498Szrj 	      || vectorizable_conversion (stmt, NULL, NULL, node)
942538fd1498Szrj 	      || vectorizable_shift (stmt, NULL, NULL, node)
942638fd1498Szrj 	      || vectorizable_operation (stmt, NULL, NULL, node)
942738fd1498Szrj 	      || vectorizable_assignment (stmt, NULL, NULL, node)
942838fd1498Szrj 	      || vectorizable_load (stmt, NULL, NULL, node, NULL)
942938fd1498Szrj 	      || vectorizable_call (stmt, NULL, NULL, node)
943038fd1498Szrj 	      || vectorizable_store (stmt, NULL, NULL, node)
943138fd1498Szrj 	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
943238fd1498Szrj 	      || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
943338fd1498Szrj     }
943438fd1498Szrj 
943538fd1498Szrj   if (!ok)
943638fd1498Szrj     {
943738fd1498Szrj       if (dump_enabled_p ())
943838fd1498Szrj         {
943938fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
944038fd1498Szrj                            "not vectorized: relevant stmt not ");
944138fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
944238fd1498Szrj           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
944338fd1498Szrj         }
944438fd1498Szrj 
944538fd1498Szrj       return false;
944638fd1498Szrj     }
944738fd1498Szrj 
944838fd1498Szrj   if (bb_vinfo)
944938fd1498Szrj     return true;
945038fd1498Szrj 
945138fd1498Szrj   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
945238fd1498Szrj       need extra handling, except for vectorizable reductions.  */
945338fd1498Szrj   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
945438fd1498Szrj       && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
945538fd1498Szrj     {
945638fd1498Szrj       if (dump_enabled_p ())
945738fd1498Szrj         {
945838fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
945938fd1498Szrj                            "not vectorized: live stmt not supported: ");
946038fd1498Szrj           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
946138fd1498Szrj         }
946238fd1498Szrj 
946338fd1498Szrj        return false;
946438fd1498Szrj     }
946538fd1498Szrj 
946638fd1498Szrj   return true;
946738fd1498Szrj }
946838fd1498Szrj 
946938fd1498Szrj 
947038fd1498Szrj /* Function vect_transform_stmt.
947138fd1498Szrj 
947238fd1498Szrj    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
947338fd1498Szrj 
947438fd1498Szrj bool
vect_transform_stmt(gimple * stmt,gimple_stmt_iterator * gsi,bool * grouped_store,slp_tree slp_node,slp_instance slp_node_instance)947538fd1498Szrj vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
947638fd1498Szrj 		     bool *grouped_store, slp_tree slp_node,
947738fd1498Szrj                      slp_instance slp_node_instance)
947838fd1498Szrj {
947938fd1498Szrj   bool is_store = false;
948038fd1498Szrj   gimple *vec_stmt = NULL;
948138fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
948238fd1498Szrj   bool done;
948338fd1498Szrj 
948438fd1498Szrj   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
948538fd1498Szrj   gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
948638fd1498Szrj 
948738fd1498Szrj   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
948838fd1498Szrj 		   && nested_in_vect_loop_p
948938fd1498Szrj 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
949038fd1498Szrj 			 stmt));
949138fd1498Szrj 
949238fd1498Szrj   switch (STMT_VINFO_TYPE (stmt_info))
949338fd1498Szrj     {
949438fd1498Szrj     case type_demotion_vec_info_type:
949538fd1498Szrj     case type_promotion_vec_info_type:
949638fd1498Szrj     case type_conversion_vec_info_type:
949738fd1498Szrj       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
949838fd1498Szrj       gcc_assert (done);
949938fd1498Szrj       break;
950038fd1498Szrj 
950138fd1498Szrj     case induc_vec_info_type:
950238fd1498Szrj       done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
950338fd1498Szrj       gcc_assert (done);
950438fd1498Szrj       break;
950538fd1498Szrj 
950638fd1498Szrj     case shift_vec_info_type:
950738fd1498Szrj       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
950838fd1498Szrj       gcc_assert (done);
950938fd1498Szrj       break;
951038fd1498Szrj 
951138fd1498Szrj     case op_vec_info_type:
951238fd1498Szrj       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
951338fd1498Szrj       gcc_assert (done);
951438fd1498Szrj       break;
951538fd1498Szrj 
951638fd1498Szrj     case assignment_vec_info_type:
951738fd1498Szrj       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
951838fd1498Szrj       gcc_assert (done);
951938fd1498Szrj       break;
952038fd1498Szrj 
952138fd1498Szrj     case load_vec_info_type:
952238fd1498Szrj       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
952338fd1498Szrj                                 slp_node_instance);
952438fd1498Szrj       gcc_assert (done);
952538fd1498Szrj       break;
952638fd1498Szrj 
952738fd1498Szrj     case store_vec_info_type:
952838fd1498Szrj       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
952938fd1498Szrj       gcc_assert (done);
953038fd1498Szrj       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
953138fd1498Szrj 	{
953238fd1498Szrj 	  /* In case of interleaving, the whole chain is vectorized when the
953338fd1498Szrj 	     last store in the chain is reached.  Store stmts before the last
953438fd1498Szrj 	     one are skipped, and there vec_stmt_info shouldn't be freed
953538fd1498Szrj 	     meanwhile.  */
953638fd1498Szrj 	  *grouped_store = true;
953738fd1498Szrj 	  stmt_vec_info group_info
953838fd1498Szrj 	    = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
953938fd1498Szrj 	  if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
954038fd1498Szrj 	    is_store = true;
954138fd1498Szrj 	}
954238fd1498Szrj       else
954338fd1498Szrj 	is_store = true;
954438fd1498Szrj       break;
954538fd1498Szrj 
954638fd1498Szrj     case condition_vec_info_type:
954738fd1498Szrj       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
954838fd1498Szrj       gcc_assert (done);
954938fd1498Szrj       break;
955038fd1498Szrj 
955138fd1498Szrj     case comparison_vec_info_type:
955238fd1498Szrj       done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
955338fd1498Szrj       gcc_assert (done);
955438fd1498Szrj       break;
955538fd1498Szrj 
955638fd1498Szrj     case call_vec_info_type:
955738fd1498Szrj       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
955838fd1498Szrj       stmt = gsi_stmt (*gsi);
955938fd1498Szrj       break;
956038fd1498Szrj 
956138fd1498Szrj     case call_simd_clone_vec_info_type:
956238fd1498Szrj       done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
956338fd1498Szrj       stmt = gsi_stmt (*gsi);
956438fd1498Szrj       break;
956538fd1498Szrj 
956638fd1498Szrj     case reduc_vec_info_type:
956738fd1498Szrj       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
956838fd1498Szrj 				     slp_node_instance);
956938fd1498Szrj       gcc_assert (done);
957038fd1498Szrj       break;
957138fd1498Szrj 
957238fd1498Szrj     default:
957338fd1498Szrj       if (!STMT_VINFO_LIVE_P (stmt_info))
957438fd1498Szrj 	{
957538fd1498Szrj 	  if (dump_enabled_p ())
957638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
957738fd1498Szrj                              "stmt not supported.\n");
957838fd1498Szrj 	  gcc_unreachable ();
957938fd1498Szrj 	}
958038fd1498Szrj     }
958138fd1498Szrj 
958238fd1498Szrj   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
958338fd1498Szrj      This would break hybrid SLP vectorization.  */
958438fd1498Szrj   if (slp_node)
958538fd1498Szrj     gcc_assert (!vec_stmt
958638fd1498Szrj 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
958738fd1498Szrj 
958838fd1498Szrj   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
958938fd1498Szrj      is being vectorized, but outside the immediately enclosing loop.  */
959038fd1498Szrj   if (vec_stmt
959138fd1498Szrj       && nested_p
959238fd1498Szrj       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
959338fd1498Szrj       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
959438fd1498Szrj           || STMT_VINFO_RELEVANT (stmt_info) ==
959538fd1498Szrj                                            vect_used_in_outer_by_reduction))
959638fd1498Szrj     {
959738fd1498Szrj       struct loop *innerloop = LOOP_VINFO_LOOP (
959838fd1498Szrj                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
959938fd1498Szrj       imm_use_iterator imm_iter;
960038fd1498Szrj       use_operand_p use_p;
960138fd1498Szrj       tree scalar_dest;
960238fd1498Szrj       gimple *exit_phi;
960338fd1498Szrj 
960438fd1498Szrj       if (dump_enabled_p ())
960538fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
960638fd1498Szrj                          "Record the vdef for outer-loop vectorization.\n");
960738fd1498Szrj 
960838fd1498Szrj       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
960938fd1498Szrj         (to be used when vectorizing outer-loop stmts that use the DEF of
961038fd1498Szrj         STMT).  */
961138fd1498Szrj       if (gimple_code (stmt) == GIMPLE_PHI)
961238fd1498Szrj         scalar_dest = PHI_RESULT (stmt);
961338fd1498Szrj       else
9614*58e805e6Szrj         scalar_dest = gimple_get_lhs (stmt);
961538fd1498Szrj 
961638fd1498Szrj       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
961738fd1498Szrj        {
961838fd1498Szrj          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
961938fd1498Szrj            {
962038fd1498Szrj              exit_phi = USE_STMT (use_p);
962138fd1498Szrj              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
962238fd1498Szrj            }
962338fd1498Szrj        }
962438fd1498Szrj     }
962538fd1498Szrj 
962638fd1498Szrj   /* Handle stmts whose DEF is used outside the loop-nest that is
962738fd1498Szrj      being vectorized.  */
962838fd1498Szrj   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
962938fd1498Szrj     {
963038fd1498Szrj       done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
963138fd1498Szrj       gcc_assert (done);
963238fd1498Szrj     }
963338fd1498Szrj 
963438fd1498Szrj   if (vec_stmt)
963538fd1498Szrj     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
963638fd1498Szrj 
963738fd1498Szrj   return is_store;
963838fd1498Szrj }
963938fd1498Szrj 
964038fd1498Szrj 
964138fd1498Szrj /* Remove a group of stores (for SLP or interleaving), free their
964238fd1498Szrj    stmt_vec_info.  */
964338fd1498Szrj 
964438fd1498Szrj void
vect_remove_stores(gimple * first_stmt)964538fd1498Szrj vect_remove_stores (gimple *first_stmt)
964638fd1498Szrj {
964738fd1498Szrj   gimple *next = first_stmt;
964838fd1498Szrj   gimple *tmp;
964938fd1498Szrj   gimple_stmt_iterator next_si;
965038fd1498Szrj 
965138fd1498Szrj   while (next)
965238fd1498Szrj     {
965338fd1498Szrj       stmt_vec_info stmt_info = vinfo_for_stmt (next);
965438fd1498Szrj 
965538fd1498Szrj       tmp = GROUP_NEXT_ELEMENT (stmt_info);
965638fd1498Szrj       if (is_pattern_stmt_p (stmt_info))
965738fd1498Szrj 	next = STMT_VINFO_RELATED_STMT (stmt_info);
965838fd1498Szrj       /* Free the attached stmt_vec_info and remove the stmt.  */
965938fd1498Szrj       next_si = gsi_for_stmt (next);
966038fd1498Szrj       unlink_stmt_vdef (next);
966138fd1498Szrj       gsi_remove (&next_si, true);
966238fd1498Szrj       release_defs (next);
966338fd1498Szrj       free_stmt_vec_info (next);
966438fd1498Szrj       next = tmp;
966538fd1498Szrj     }
966638fd1498Szrj }
966738fd1498Szrj 
966838fd1498Szrj 
966938fd1498Szrj /* Function new_stmt_vec_info.
967038fd1498Szrj 
967138fd1498Szrj    Create and initialize a new stmt_vec_info struct for STMT.  */
967238fd1498Szrj 
967338fd1498Szrj stmt_vec_info
new_stmt_vec_info(gimple * stmt,vec_info * vinfo)967438fd1498Szrj new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
967538fd1498Szrj {
967638fd1498Szrj   stmt_vec_info res;
967738fd1498Szrj   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
967838fd1498Szrj 
967938fd1498Szrj   STMT_VINFO_TYPE (res) = undef_vec_info_type;
968038fd1498Szrj   STMT_VINFO_STMT (res) = stmt;
968138fd1498Szrj   res->vinfo = vinfo;
968238fd1498Szrj   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
968338fd1498Szrj   STMT_VINFO_LIVE_P (res) = false;
968438fd1498Szrj   STMT_VINFO_VECTYPE (res) = NULL;
968538fd1498Szrj   STMT_VINFO_VEC_STMT (res) = NULL;
968638fd1498Szrj   STMT_VINFO_VECTORIZABLE (res) = true;
968738fd1498Szrj   STMT_VINFO_IN_PATTERN_P (res) = false;
968838fd1498Szrj   STMT_VINFO_RELATED_STMT (res) = NULL;
968938fd1498Szrj   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
969038fd1498Szrj   STMT_VINFO_DATA_REF (res) = NULL;
969138fd1498Szrj   STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
969238fd1498Szrj   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
969338fd1498Szrj 
969438fd1498Szrj   if (gimple_code (stmt) == GIMPLE_PHI
969538fd1498Szrj       && is_loop_header_bb_p (gimple_bb (stmt)))
969638fd1498Szrj     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
969738fd1498Szrj   else
969838fd1498Szrj     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
969938fd1498Szrj 
970038fd1498Szrj   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
970138fd1498Szrj   STMT_SLP_TYPE (res) = loop_vect;
970238fd1498Szrj   STMT_VINFO_NUM_SLP_USES (res) = 0;
970338fd1498Szrj 
970438fd1498Szrj   GROUP_FIRST_ELEMENT (res) = NULL;
970538fd1498Szrj   GROUP_NEXT_ELEMENT (res) = NULL;
970638fd1498Szrj   GROUP_SIZE (res) = 0;
970738fd1498Szrj   GROUP_STORE_COUNT (res) = 0;
970838fd1498Szrj   GROUP_GAP (res) = 0;
970938fd1498Szrj   GROUP_SAME_DR_STMT (res) = NULL;
971038fd1498Szrj 
971138fd1498Szrj   return res;
971238fd1498Szrj }
971338fd1498Szrj 
971438fd1498Szrj 
971538fd1498Szrj /* Create a hash table for stmt_vec_info. */
971638fd1498Szrj 
971738fd1498Szrj void
init_stmt_vec_info_vec(void)971838fd1498Szrj init_stmt_vec_info_vec (void)
971938fd1498Szrj {
972038fd1498Szrj   gcc_assert (!stmt_vec_info_vec.exists ());
972138fd1498Szrj   stmt_vec_info_vec.create (50);
972238fd1498Szrj }
972338fd1498Szrj 
972438fd1498Szrj 
972538fd1498Szrj /* Free hash table for stmt_vec_info. */
972638fd1498Szrj 
972738fd1498Szrj void
free_stmt_vec_info_vec(void)972838fd1498Szrj free_stmt_vec_info_vec (void)
972938fd1498Szrj {
973038fd1498Szrj   unsigned int i;
973138fd1498Szrj   stmt_vec_info info;
973238fd1498Szrj   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
973338fd1498Szrj     if (info != NULL)
973438fd1498Szrj       free_stmt_vec_info (STMT_VINFO_STMT (info));
973538fd1498Szrj   gcc_assert (stmt_vec_info_vec.exists ());
973638fd1498Szrj   stmt_vec_info_vec.release ();
973738fd1498Szrj }
973838fd1498Szrj 
973938fd1498Szrj 
974038fd1498Szrj /* Free stmt vectorization related info.  */
974138fd1498Szrj 
974238fd1498Szrj void
free_stmt_vec_info(gimple * stmt)974338fd1498Szrj free_stmt_vec_info (gimple *stmt)
974438fd1498Szrj {
974538fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
974638fd1498Szrj 
974738fd1498Szrj   if (!stmt_info)
974838fd1498Szrj     return;
974938fd1498Szrj 
975038fd1498Szrj   /* Check if this statement has a related "pattern stmt"
975138fd1498Szrj      (introduced by the vectorizer during the pattern recognition
975238fd1498Szrj      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
975338fd1498Szrj      too.  */
975438fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
975538fd1498Szrj     {
975638fd1498Szrj       stmt_vec_info patt_info
975738fd1498Szrj 	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
975838fd1498Szrj       if (patt_info)
975938fd1498Szrj 	{
976038fd1498Szrj 	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
976138fd1498Szrj 	  gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
976238fd1498Szrj 	  gimple_set_bb (patt_stmt, NULL);
976338fd1498Szrj 	  tree lhs = gimple_get_lhs (patt_stmt);
976438fd1498Szrj 	  if (lhs && TREE_CODE (lhs) == SSA_NAME)
976538fd1498Szrj 	    release_ssa_name (lhs);
976638fd1498Szrj 	  if (seq)
976738fd1498Szrj 	    {
976838fd1498Szrj 	      gimple_stmt_iterator si;
976938fd1498Szrj 	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
977038fd1498Szrj 		{
977138fd1498Szrj 		  gimple *seq_stmt = gsi_stmt (si);
977238fd1498Szrj 		  gimple_set_bb (seq_stmt, NULL);
977338fd1498Szrj 		  lhs = gimple_get_lhs (seq_stmt);
977438fd1498Szrj 		  if (lhs && TREE_CODE (lhs) == SSA_NAME)
977538fd1498Szrj 		    release_ssa_name (lhs);
977638fd1498Szrj 		  free_stmt_vec_info (seq_stmt);
977738fd1498Szrj 		}
977838fd1498Szrj 	    }
977938fd1498Szrj 	  free_stmt_vec_info (patt_stmt);
978038fd1498Szrj 	}
978138fd1498Szrj     }
978238fd1498Szrj 
978338fd1498Szrj   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
978438fd1498Szrj   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
978538fd1498Szrj   set_vinfo_for_stmt (stmt, NULL);
978638fd1498Szrj   free (stmt_info);
978738fd1498Szrj }
978838fd1498Szrj 
978938fd1498Szrj 
979038fd1498Szrj /* Function get_vectype_for_scalar_type_and_size.
979138fd1498Szrj 
979238fd1498Szrj    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
979338fd1498Szrj    by the target.  */
979438fd1498Szrj 
979538fd1498Szrj tree
get_vectype_for_scalar_type_and_size(tree scalar_type,poly_uint64 size)979638fd1498Szrj get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
979738fd1498Szrj {
979838fd1498Szrj   tree orig_scalar_type = scalar_type;
979938fd1498Szrj   scalar_mode inner_mode;
980038fd1498Szrj   machine_mode simd_mode;
980138fd1498Szrj   poly_uint64 nunits;
980238fd1498Szrj   tree vectype;
980338fd1498Szrj 
980438fd1498Szrj   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
980538fd1498Szrj       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
980638fd1498Szrj     return NULL_TREE;
980738fd1498Szrj 
980838fd1498Szrj   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
980938fd1498Szrj 
981038fd1498Szrj   /* For vector types of elements whose mode precision doesn't
981138fd1498Szrj      match their types precision we use a element type of mode
981238fd1498Szrj      precision.  The vectorization routines will have to make sure
981338fd1498Szrj      they support the proper result truncation/extension.
981438fd1498Szrj      We also make sure to build vector types with INTEGER_TYPE
981538fd1498Szrj      component type only.  */
981638fd1498Szrj   if (INTEGRAL_TYPE_P (scalar_type)
981738fd1498Szrj       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
981838fd1498Szrj 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
981938fd1498Szrj     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
982038fd1498Szrj 						  TYPE_UNSIGNED (scalar_type));
982138fd1498Szrj 
982238fd1498Szrj   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
982338fd1498Szrj      When the component mode passes the above test simply use a type
982438fd1498Szrj      corresponding to that mode.  The theory is that any use that
982538fd1498Szrj      would cause problems with this will disable vectorization anyway.  */
982638fd1498Szrj   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
982738fd1498Szrj 	   && !INTEGRAL_TYPE_P (scalar_type))
982838fd1498Szrj     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
982938fd1498Szrj 
983038fd1498Szrj   /* We can't build a vector type of elements with alignment bigger than
983138fd1498Szrj      their size.  */
983238fd1498Szrj   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
983338fd1498Szrj     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
983438fd1498Szrj 						  TYPE_UNSIGNED (scalar_type));
983538fd1498Szrj 
983638fd1498Szrj   /* If we felt back to using the mode fail if there was
983738fd1498Szrj      no scalar type for it.  */
983838fd1498Szrj   if (scalar_type == NULL_TREE)
983938fd1498Szrj     return NULL_TREE;
984038fd1498Szrj 
984138fd1498Szrj   /* If no size was supplied use the mode the target prefers.   Otherwise
984238fd1498Szrj      lookup a vector mode of the specified size.  */
984338fd1498Szrj   if (known_eq (size, 0U))
984438fd1498Szrj     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
984538fd1498Szrj   else if (!multiple_p (size, nbytes, &nunits)
984638fd1498Szrj 	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
984738fd1498Szrj     return NULL_TREE;
984838fd1498Szrj   /* NOTE: nunits == 1 is allowed to support single element vector types.  */
984938fd1498Szrj   if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
985038fd1498Szrj     return NULL_TREE;
985138fd1498Szrj 
985238fd1498Szrj   vectype = build_vector_type (scalar_type, nunits);
985338fd1498Szrj 
985438fd1498Szrj   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
985538fd1498Szrj       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
985638fd1498Szrj     return NULL_TREE;
985738fd1498Szrj 
985838fd1498Szrj   /* Re-attach the address-space qualifier if we canonicalized the scalar
985938fd1498Szrj      type.  */
986038fd1498Szrj   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
986138fd1498Szrj     return build_qualified_type
986238fd1498Szrj 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
986338fd1498Szrj 
986438fd1498Szrj   return vectype;
986538fd1498Szrj }
986638fd1498Szrj 
986738fd1498Szrj poly_uint64 current_vector_size;
986838fd1498Szrj 
986938fd1498Szrj /* Function get_vectype_for_scalar_type.
987038fd1498Szrj 
987138fd1498Szrj    Returns the vector type corresponding to SCALAR_TYPE as supported
987238fd1498Szrj    by the target.  */
987338fd1498Szrj 
987438fd1498Szrj tree
get_vectype_for_scalar_type(tree scalar_type)987538fd1498Szrj get_vectype_for_scalar_type (tree scalar_type)
987638fd1498Szrj {
987738fd1498Szrj   tree vectype;
987838fd1498Szrj   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
987938fd1498Szrj 						  current_vector_size);
988038fd1498Szrj   if (vectype
988138fd1498Szrj       && known_eq (current_vector_size, 0U))
988238fd1498Szrj     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
988338fd1498Szrj   return vectype;
988438fd1498Szrj }
988538fd1498Szrj 
988638fd1498Szrj /* Function get_mask_type_for_scalar_type.
988738fd1498Szrj 
988838fd1498Szrj    Returns the mask type corresponding to a result of comparison
988938fd1498Szrj    of vectors of specified SCALAR_TYPE as supported by target.  */
989038fd1498Szrj 
989138fd1498Szrj tree
get_mask_type_for_scalar_type(tree scalar_type)989238fd1498Szrj get_mask_type_for_scalar_type (tree scalar_type)
989338fd1498Szrj {
989438fd1498Szrj   tree vectype = get_vectype_for_scalar_type (scalar_type);
989538fd1498Szrj 
989638fd1498Szrj   if (!vectype)
989738fd1498Szrj     return NULL;
989838fd1498Szrj 
989938fd1498Szrj   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
990038fd1498Szrj 				  current_vector_size);
990138fd1498Szrj }
990238fd1498Szrj 
990338fd1498Szrj /* Function get_same_sized_vectype
990438fd1498Szrj 
990538fd1498Szrj    Returns a vector type corresponding to SCALAR_TYPE of size
990638fd1498Szrj    VECTOR_TYPE if supported by the target.  */
990738fd1498Szrj 
990838fd1498Szrj tree
get_same_sized_vectype(tree scalar_type,tree vector_type)990938fd1498Szrj get_same_sized_vectype (tree scalar_type, tree vector_type)
991038fd1498Szrj {
991138fd1498Szrj   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
991238fd1498Szrj     return build_same_sized_truth_vector_type (vector_type);
991338fd1498Szrj 
991438fd1498Szrj   return get_vectype_for_scalar_type_and_size
991538fd1498Szrj 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
991638fd1498Szrj }
991738fd1498Szrj 
991838fd1498Szrj /* Function vect_is_simple_use.
991938fd1498Szrj 
992038fd1498Szrj    Input:
992138fd1498Szrj    VINFO - the vect info of the loop or basic block that is being vectorized.
992238fd1498Szrj    OPERAND - operand in the loop or bb.
992338fd1498Szrj    Output:
992438fd1498Szrj    DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
992538fd1498Szrj    DT - the type of definition
992638fd1498Szrj 
992738fd1498Szrj    Returns whether a stmt with OPERAND can be vectorized.
992838fd1498Szrj    For loops, supportable operands are constants, loop invariants, and operands
992938fd1498Szrj    that are defined by the current iteration of the loop.  Unsupportable
993038fd1498Szrj    operands are those that are defined by a previous iteration of the loop (as
993138fd1498Szrj    is the case in reduction/induction computations).
993238fd1498Szrj    For basic blocks, supportable operands are constants and bb invariants.
993338fd1498Szrj    For now, operands defined outside the basic block are not supported.  */
993438fd1498Szrj 
993538fd1498Szrj bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt)993638fd1498Szrj vect_is_simple_use (tree operand, vec_info *vinfo,
993738fd1498Szrj                     gimple **def_stmt, enum vect_def_type *dt)
993838fd1498Szrj {
993938fd1498Szrj   *def_stmt = NULL;
994038fd1498Szrj   *dt = vect_unknown_def_type;
994138fd1498Szrj 
994238fd1498Szrj   if (dump_enabled_p ())
994338fd1498Szrj     {
994438fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
994538fd1498Szrj                        "vect_is_simple_use: operand ");
994638fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
994738fd1498Szrj       dump_printf (MSG_NOTE, "\n");
994838fd1498Szrj     }
994938fd1498Szrj 
995038fd1498Szrj   if (CONSTANT_CLASS_P (operand))
995138fd1498Szrj     {
995238fd1498Szrj       *dt = vect_constant_def;
995338fd1498Szrj       return true;
995438fd1498Szrj     }
995538fd1498Szrj 
995638fd1498Szrj   if (is_gimple_min_invariant (operand))
995738fd1498Szrj     {
995838fd1498Szrj       *dt = vect_external_def;
995938fd1498Szrj       return true;
996038fd1498Szrj     }
996138fd1498Szrj 
996238fd1498Szrj   if (TREE_CODE (operand) != SSA_NAME)
996338fd1498Szrj     {
996438fd1498Szrj       if (dump_enabled_p ())
996538fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996638fd1498Szrj 			 "not ssa-name.\n");
996738fd1498Szrj       return false;
996838fd1498Szrj     }
996938fd1498Szrj 
997038fd1498Szrj   if (SSA_NAME_IS_DEFAULT_DEF (operand))
997138fd1498Szrj     {
997238fd1498Szrj       *dt = vect_external_def;
997338fd1498Szrj       return true;
997438fd1498Szrj     }
997538fd1498Szrj 
997638fd1498Szrj   *def_stmt = SSA_NAME_DEF_STMT (operand);
997738fd1498Szrj   if (dump_enabled_p ())
997838fd1498Szrj     {
997938fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
998038fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
998138fd1498Szrj     }
998238fd1498Szrj 
998338fd1498Szrj   if (! vect_stmt_in_region_p (vinfo, *def_stmt))
998438fd1498Szrj     *dt = vect_external_def;
998538fd1498Szrj   else
998638fd1498Szrj     {
998738fd1498Szrj       stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
998838fd1498Szrj       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
998938fd1498Szrj     }
999038fd1498Szrj 
999138fd1498Szrj   if (dump_enabled_p ())
999238fd1498Szrj     {
999338fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
999438fd1498Szrj       switch (*dt)
999538fd1498Szrj 	{
999638fd1498Szrj 	case vect_uninitialized_def:
999738fd1498Szrj 	  dump_printf (MSG_NOTE, "uninitialized\n");
999838fd1498Szrj 	  break;
999938fd1498Szrj 	case vect_constant_def:
1000038fd1498Szrj 	  dump_printf (MSG_NOTE, "constant\n");
1000138fd1498Szrj 	  break;
1000238fd1498Szrj 	case vect_external_def:
1000338fd1498Szrj 	  dump_printf (MSG_NOTE, "external\n");
1000438fd1498Szrj 	  break;
1000538fd1498Szrj 	case vect_internal_def:
1000638fd1498Szrj 	  dump_printf (MSG_NOTE, "internal\n");
1000738fd1498Szrj 	  break;
1000838fd1498Szrj 	case vect_induction_def:
1000938fd1498Szrj 	  dump_printf (MSG_NOTE, "induction\n");
1001038fd1498Szrj 	  break;
1001138fd1498Szrj 	case vect_reduction_def:
1001238fd1498Szrj 	  dump_printf (MSG_NOTE, "reduction\n");
1001338fd1498Szrj 	  break;
1001438fd1498Szrj 	case vect_double_reduction_def:
1001538fd1498Szrj 	  dump_printf (MSG_NOTE, "double reduction\n");
1001638fd1498Szrj 	  break;
1001738fd1498Szrj 	case vect_nested_cycle:
1001838fd1498Szrj 	  dump_printf (MSG_NOTE, "nested cycle\n");
1001938fd1498Szrj 	  break;
1002038fd1498Szrj 	case vect_unknown_def_type:
1002138fd1498Szrj 	  dump_printf (MSG_NOTE, "unknown\n");
1002238fd1498Szrj 	  break;
1002338fd1498Szrj 	}
1002438fd1498Szrj     }
1002538fd1498Szrj 
1002638fd1498Szrj   if (*dt == vect_unknown_def_type)
1002738fd1498Szrj     {
1002838fd1498Szrj       if (dump_enabled_p ())
1002938fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1003038fd1498Szrj                          "Unsupported pattern.\n");
1003138fd1498Szrj       return false;
1003238fd1498Szrj     }
1003338fd1498Szrj 
1003438fd1498Szrj   switch (gimple_code (*def_stmt))
1003538fd1498Szrj     {
1003638fd1498Szrj     case GIMPLE_PHI:
1003738fd1498Szrj     case GIMPLE_ASSIGN:
1003838fd1498Szrj     case GIMPLE_CALL:
1003938fd1498Szrj       break;
1004038fd1498Szrj     default:
1004138fd1498Szrj       if (dump_enabled_p ())
1004238fd1498Szrj         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1004338fd1498Szrj                          "unsupported defining stmt:\n");
1004438fd1498Szrj       return false;
1004538fd1498Szrj     }
1004638fd1498Szrj 
1004738fd1498Szrj   return true;
1004838fd1498Szrj }
1004938fd1498Szrj 
1005038fd1498Szrj /* Function vect_is_simple_use.
1005138fd1498Szrj 
1005238fd1498Szrj    Same as vect_is_simple_use but also determines the vector operand
1005338fd1498Szrj    type of OPERAND and stores it to *VECTYPE.  If the definition of
1005438fd1498Szrj    OPERAND is vect_uninitialized_def, vect_constant_def or
1005538fd1498Szrj    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
1005638fd1498Szrj    is responsible to compute the best suited vector type for the
1005738fd1498Szrj    scalar operand.  */
1005838fd1498Szrj 
1005938fd1498Szrj bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt,tree * vectype)1006038fd1498Szrj vect_is_simple_use (tree operand, vec_info *vinfo,
1006138fd1498Szrj 		    gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
1006238fd1498Szrj {
1006338fd1498Szrj   if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
1006438fd1498Szrj     return false;
1006538fd1498Szrj 
1006638fd1498Szrj   /* Now get a vector type if the def is internal, otherwise supply
1006738fd1498Szrj      NULL_TREE and leave it up to the caller to figure out a proper
1006838fd1498Szrj      type for the use stmt.  */
1006938fd1498Szrj   if (*dt == vect_internal_def
1007038fd1498Szrj       || *dt == vect_induction_def
1007138fd1498Szrj       || *dt == vect_reduction_def
1007238fd1498Szrj       || *dt == vect_double_reduction_def
1007338fd1498Szrj       || *dt == vect_nested_cycle)
1007438fd1498Szrj     {
1007538fd1498Szrj       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
1007638fd1498Szrj 
1007738fd1498Szrj       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1007838fd1498Szrj           && !STMT_VINFO_RELEVANT (stmt_info)
1007938fd1498Szrj           && !STMT_VINFO_LIVE_P (stmt_info))
1008038fd1498Szrj 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
1008138fd1498Szrj 
1008238fd1498Szrj       *vectype = STMT_VINFO_VECTYPE (stmt_info);
1008338fd1498Szrj       gcc_assert (*vectype != NULL_TREE);
1008438fd1498Szrj     }
1008538fd1498Szrj   else if (*dt == vect_uninitialized_def
1008638fd1498Szrj 	   || *dt == vect_constant_def
1008738fd1498Szrj 	   || *dt == vect_external_def)
1008838fd1498Szrj     *vectype = NULL_TREE;
1008938fd1498Szrj   else
1009038fd1498Szrj     gcc_unreachable ();
1009138fd1498Szrj 
1009238fd1498Szrj   return true;
1009338fd1498Szrj }
1009438fd1498Szrj 
1009538fd1498Szrj 
1009638fd1498Szrj /* Function supportable_widening_operation
1009738fd1498Szrj 
1009838fd1498Szrj    Check whether an operation represented by the code CODE is a
1009938fd1498Szrj    widening operation that is supported by the target platform in
1010038fd1498Szrj    vector form (i.e., when operating on arguments of type VECTYPE_IN
1010138fd1498Szrj    producing a result of type VECTYPE_OUT).
1010238fd1498Szrj 
1010338fd1498Szrj    Widening operations we currently support are NOP (CONVERT), FLOAT
1010438fd1498Szrj    and WIDEN_MULT.  This function checks if these operations are supported
1010538fd1498Szrj    by the target platform either directly (via vector tree-codes), or via
1010638fd1498Szrj    target builtins.
1010738fd1498Szrj 
1010838fd1498Szrj    Output:
1010938fd1498Szrj    - CODE1 and CODE2 are codes of vector operations to be used when
1011038fd1498Szrj    vectorizing the operation, if available.
1011138fd1498Szrj    - MULTI_STEP_CVT determines the number of required intermediate steps in
1011238fd1498Szrj    case of multi-step conversion (like char->short->int - in that case
1011338fd1498Szrj    MULTI_STEP_CVT will be 1).
1011438fd1498Szrj    - INTERM_TYPES contains the intermediate type required to perform the
1011538fd1498Szrj    widening operation (short in the above example).  */
1011638fd1498Szrj 
1011738fd1498Szrj bool
supportable_widening_operation(enum tree_code code,gimple * stmt,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)1011838fd1498Szrj supportable_widening_operation (enum tree_code code, gimple *stmt,
1011938fd1498Szrj 				tree vectype_out, tree vectype_in,
1012038fd1498Szrj                                 enum tree_code *code1, enum tree_code *code2,
1012138fd1498Szrj                                 int *multi_step_cvt,
1012238fd1498Szrj                                 vec<tree> *interm_types)
1012338fd1498Szrj {
1012438fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1012538fd1498Szrj   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
1012638fd1498Szrj   struct loop *vect_loop = NULL;
1012738fd1498Szrj   machine_mode vec_mode;
1012838fd1498Szrj   enum insn_code icode1, icode2;
1012938fd1498Szrj   optab optab1, optab2;
1013038fd1498Szrj   tree vectype = vectype_in;
1013138fd1498Szrj   tree wide_vectype = vectype_out;
1013238fd1498Szrj   enum tree_code c1, c2;
1013338fd1498Szrj   int i;
1013438fd1498Szrj   tree prev_type, intermediate_type;
1013538fd1498Szrj   machine_mode intermediate_mode, prev_mode;
1013638fd1498Szrj   optab optab3, optab4;
1013738fd1498Szrj 
1013838fd1498Szrj   *multi_step_cvt = 0;
1013938fd1498Szrj   if (loop_info)
1014038fd1498Szrj     vect_loop = LOOP_VINFO_LOOP (loop_info);
1014138fd1498Szrj 
1014238fd1498Szrj   switch (code)
1014338fd1498Szrj     {
1014438fd1498Szrj     case WIDEN_MULT_EXPR:
1014538fd1498Szrj       /* The result of a vectorized widening operation usually requires
1014638fd1498Szrj 	 two vectors (because the widened results do not fit into one vector).
1014738fd1498Szrj 	 The generated vector results would normally be expected to be
1014838fd1498Szrj 	 generated in the same order as in the original scalar computation,
1014938fd1498Szrj 	 i.e. if 8 results are generated in each vector iteration, they are
1015038fd1498Szrj 	 to be organized as follows:
1015138fd1498Szrj 		vect1: [res1,res2,res3,res4],
1015238fd1498Szrj 		vect2: [res5,res6,res7,res8].
1015338fd1498Szrj 
1015438fd1498Szrj 	 However, in the special case that the result of the widening
1015538fd1498Szrj 	 operation is used in a reduction computation only, the order doesn't
1015638fd1498Szrj 	 matter (because when vectorizing a reduction we change the order of
1015738fd1498Szrj 	 the computation).  Some targets can take advantage of this and
1015838fd1498Szrj 	 generate more efficient code.  For example, targets like Altivec,
1015938fd1498Szrj 	 that support widen_mult using a sequence of {mult_even,mult_odd}
1016038fd1498Szrj 	 generate the following vectors:
1016138fd1498Szrj 		vect1: [res1,res3,res5,res7],
1016238fd1498Szrj 		vect2: [res2,res4,res6,res8].
1016338fd1498Szrj 
1016438fd1498Szrj 	 When vectorizing outer-loops, we execute the inner-loop sequentially
1016538fd1498Szrj 	 (each vectorized inner-loop iteration contributes to VF outer-loop
1016638fd1498Szrj 	 iterations in parallel).  We therefore don't allow to change the
1016738fd1498Szrj 	 order of the computation in the inner-loop during outer-loop
1016838fd1498Szrj 	 vectorization.  */
1016938fd1498Szrj       /* TODO: Another case in which order doesn't *really* matter is when we
1017038fd1498Szrj 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
1017138fd1498Szrj 	 Normally, pack_trunc performs an even/odd permute, whereas the
1017238fd1498Szrj 	 repack from an even/odd expansion would be an interleave, which
1017338fd1498Szrj 	 would be significantly simpler for e.g. AVX2.  */
1017438fd1498Szrj       /* In any case, in order to avoid duplicating the code below, recurse
1017538fd1498Szrj 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
1017638fd1498Szrj 	 are properly set up for the caller.  If we fail, we'll continue with
1017738fd1498Szrj 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
1017838fd1498Szrj       if (vect_loop
1017938fd1498Szrj 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
1018038fd1498Szrj 	  && !nested_in_vect_loop_p (vect_loop, stmt)
1018138fd1498Szrj 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
1018238fd1498Szrj 					     stmt, vectype_out, vectype_in,
1018338fd1498Szrj 					     code1, code2, multi_step_cvt,
1018438fd1498Szrj 					     interm_types))
1018538fd1498Szrj         {
1018638fd1498Szrj           /* Elements in a vector with vect_used_by_reduction property cannot
1018738fd1498Szrj              be reordered if the use chain with this property does not have the
1018838fd1498Szrj              same operation.  One such an example is s += a * b, where elements
1018938fd1498Szrj              in a and b cannot be reordered.  Here we check if the vector defined
1019038fd1498Szrj              by STMT is only directly used in the reduction statement.  */
1019138fd1498Szrj           tree lhs = gimple_assign_lhs (stmt);
1019238fd1498Szrj           use_operand_p dummy;
1019338fd1498Szrj           gimple *use_stmt;
1019438fd1498Szrj           stmt_vec_info use_stmt_info = NULL;
1019538fd1498Szrj           if (single_imm_use (lhs, &dummy, &use_stmt)
1019638fd1498Szrj               && (use_stmt_info = vinfo_for_stmt (use_stmt))
1019738fd1498Szrj               && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
1019838fd1498Szrj             return true;
1019938fd1498Szrj         }
1020038fd1498Szrj       c1 = VEC_WIDEN_MULT_LO_EXPR;
1020138fd1498Szrj       c2 = VEC_WIDEN_MULT_HI_EXPR;
1020238fd1498Szrj       break;
1020338fd1498Szrj 
1020438fd1498Szrj     case DOT_PROD_EXPR:
1020538fd1498Szrj       c1 = DOT_PROD_EXPR;
1020638fd1498Szrj       c2 = DOT_PROD_EXPR;
1020738fd1498Szrj       break;
1020838fd1498Szrj 
1020938fd1498Szrj     case SAD_EXPR:
1021038fd1498Szrj       c1 = SAD_EXPR;
1021138fd1498Szrj       c2 = SAD_EXPR;
1021238fd1498Szrj       break;
1021338fd1498Szrj 
1021438fd1498Szrj     case VEC_WIDEN_MULT_EVEN_EXPR:
1021538fd1498Szrj       /* Support the recursion induced just above.  */
1021638fd1498Szrj       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
1021738fd1498Szrj       c2 = VEC_WIDEN_MULT_ODD_EXPR;
1021838fd1498Szrj       break;
1021938fd1498Szrj 
1022038fd1498Szrj     case WIDEN_LSHIFT_EXPR:
1022138fd1498Szrj       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
1022238fd1498Szrj       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
1022338fd1498Szrj       break;
1022438fd1498Szrj 
1022538fd1498Szrj     CASE_CONVERT:
1022638fd1498Szrj       c1 = VEC_UNPACK_LO_EXPR;
1022738fd1498Szrj       c2 = VEC_UNPACK_HI_EXPR;
1022838fd1498Szrj       break;
1022938fd1498Szrj 
1023038fd1498Szrj     case FLOAT_EXPR:
1023138fd1498Szrj       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
1023238fd1498Szrj       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
1023338fd1498Szrj       break;
1023438fd1498Szrj 
1023538fd1498Szrj     case FIX_TRUNC_EXPR:
1023638fd1498Szrj       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
1023738fd1498Szrj 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
1023838fd1498Szrj 	 computing the operation.  */
1023938fd1498Szrj       return false;
1024038fd1498Szrj 
1024138fd1498Szrj     default:
1024238fd1498Szrj       gcc_unreachable ();
1024338fd1498Szrj     }
1024438fd1498Szrj 
1024538fd1498Szrj   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
1024638fd1498Szrj     std::swap (c1, c2);
1024738fd1498Szrj 
1024838fd1498Szrj   if (code == FIX_TRUNC_EXPR)
1024938fd1498Szrj     {
1025038fd1498Szrj       /* The signedness is determined from output operand.  */
1025138fd1498Szrj       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
1025238fd1498Szrj       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
1025338fd1498Szrj     }
1025438fd1498Szrj   else
1025538fd1498Szrj     {
1025638fd1498Szrj       optab1 = optab_for_tree_code (c1, vectype, optab_default);
1025738fd1498Szrj       optab2 = optab_for_tree_code (c2, vectype, optab_default);
1025838fd1498Szrj     }
1025938fd1498Szrj 
1026038fd1498Szrj   if (!optab1 || !optab2)
1026138fd1498Szrj     return false;
1026238fd1498Szrj 
1026338fd1498Szrj   vec_mode = TYPE_MODE (vectype);
1026438fd1498Szrj   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
1026538fd1498Szrj        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
1026638fd1498Szrj     return false;
1026738fd1498Szrj 
1026838fd1498Szrj   *code1 = c1;
1026938fd1498Szrj   *code2 = c2;
1027038fd1498Szrj 
1027138fd1498Szrj   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
1027238fd1498Szrj       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
1027338fd1498Szrj       /* For scalar masks we may have different boolean
1027438fd1498Szrj 	 vector types having the same QImode.  Thus we
1027538fd1498Szrj 	 add additional check for elements number.  */
1027638fd1498Szrj     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1027738fd1498Szrj 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
1027838fd1498Szrj 			 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
1027938fd1498Szrj 
1028038fd1498Szrj   /* Check if it's a multi-step conversion that can be done using intermediate
1028138fd1498Szrj      types.  */
1028238fd1498Szrj 
1028338fd1498Szrj   prev_type = vectype;
1028438fd1498Szrj   prev_mode = vec_mode;
1028538fd1498Szrj 
1028638fd1498Szrj   if (!CONVERT_EXPR_CODE_P (code))
1028738fd1498Szrj     return false;
1028838fd1498Szrj 
1028938fd1498Szrj   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
1029038fd1498Szrj      intermediate steps in promotion sequence.  We try
1029138fd1498Szrj      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
1029238fd1498Szrj      not.  */
1029338fd1498Szrj   interm_types->create (MAX_INTERM_CVT_STEPS);
1029438fd1498Szrj   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
1029538fd1498Szrj     {
1029638fd1498Szrj       intermediate_mode = insn_data[icode1].operand[0].mode;
1029738fd1498Szrj       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
1029838fd1498Szrj 	{
1029938fd1498Szrj 	  intermediate_type = vect_halve_mask_nunits (prev_type);
1030038fd1498Szrj 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
1030138fd1498Szrj 	    return false;
1030238fd1498Szrj 	}
1030338fd1498Szrj       else
1030438fd1498Szrj 	intermediate_type
1030538fd1498Szrj 	  = lang_hooks.types.type_for_mode (intermediate_mode,
1030638fd1498Szrj 					    TYPE_UNSIGNED (prev_type));
1030738fd1498Szrj 
1030838fd1498Szrj       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
1030938fd1498Szrj       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
1031038fd1498Szrj 
1031138fd1498Szrj       if (!optab3 || !optab4
1031238fd1498Szrj           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
1031338fd1498Szrj 	  || insn_data[icode1].operand[0].mode != intermediate_mode
1031438fd1498Szrj 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
1031538fd1498Szrj 	  || insn_data[icode2].operand[0].mode != intermediate_mode
1031638fd1498Szrj 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
1031738fd1498Szrj 	      == CODE_FOR_nothing)
1031838fd1498Szrj 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
1031938fd1498Szrj 	      == CODE_FOR_nothing))
1032038fd1498Szrj 	break;
1032138fd1498Szrj 
1032238fd1498Szrj       interm_types->quick_push (intermediate_type);
1032338fd1498Szrj       (*multi_step_cvt)++;
1032438fd1498Szrj 
1032538fd1498Szrj       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
1032638fd1498Szrj 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
1032738fd1498Szrj 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1032838fd1498Szrj 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
1032938fd1498Szrj 			     TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
1033038fd1498Szrj 
1033138fd1498Szrj       prev_type = intermediate_type;
1033238fd1498Szrj       prev_mode = intermediate_mode;
1033338fd1498Szrj     }
1033438fd1498Szrj 
1033538fd1498Szrj   interm_types->release ();
1033638fd1498Szrj   return false;
1033738fd1498Szrj }
1033838fd1498Szrj 
1033938fd1498Szrj 
1034038fd1498Szrj /* Function supportable_narrowing_operation
1034138fd1498Szrj 
1034238fd1498Szrj    Check whether an operation represented by the code CODE is a
1034338fd1498Szrj    narrowing operation that is supported by the target platform in
1034438fd1498Szrj    vector form (i.e., when operating on arguments of type VECTYPE_IN
1034538fd1498Szrj    and producing a result of type VECTYPE_OUT).
1034638fd1498Szrj 
1034738fd1498Szrj    Narrowing operations we currently support are NOP (CONVERT) and
1034838fd1498Szrj    FIX_TRUNC.  This function checks if these operations are supported by
1034938fd1498Szrj    the target platform directly via vector tree-codes.
1035038fd1498Szrj 
1035138fd1498Szrj    Output:
1035238fd1498Szrj    - CODE1 is the code of a vector operation to be used when
1035338fd1498Szrj    vectorizing the operation, if available.
1035438fd1498Szrj    - MULTI_STEP_CVT determines the number of required intermediate steps in
1035538fd1498Szrj    case of multi-step conversion (like int->short->char - in that case
1035638fd1498Szrj    MULTI_STEP_CVT will be 1).
1035738fd1498Szrj    - INTERM_TYPES contains the intermediate type required to perform the
1035838fd1498Szrj    narrowing operation (short in the above example).   */
1035938fd1498Szrj 
1036038fd1498Szrj bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)1036138fd1498Szrj supportable_narrowing_operation (enum tree_code code,
1036238fd1498Szrj 				 tree vectype_out, tree vectype_in,
1036338fd1498Szrj 				 enum tree_code *code1, int *multi_step_cvt,
1036438fd1498Szrj                                  vec<tree> *interm_types)
1036538fd1498Szrj {
1036638fd1498Szrj   machine_mode vec_mode;
1036738fd1498Szrj   enum insn_code icode1;
1036838fd1498Szrj   optab optab1, interm_optab;
1036938fd1498Szrj   tree vectype = vectype_in;
1037038fd1498Szrj   tree narrow_vectype = vectype_out;
1037138fd1498Szrj   enum tree_code c1;
1037238fd1498Szrj   tree intermediate_type, prev_type;
1037338fd1498Szrj   machine_mode intermediate_mode, prev_mode;
1037438fd1498Szrj   int i;
1037538fd1498Szrj   bool uns;
1037638fd1498Szrj 
1037738fd1498Szrj   *multi_step_cvt = 0;
1037838fd1498Szrj   switch (code)
1037938fd1498Szrj     {
1038038fd1498Szrj     CASE_CONVERT:
1038138fd1498Szrj       c1 = VEC_PACK_TRUNC_EXPR;
1038238fd1498Szrj       break;
1038338fd1498Szrj 
1038438fd1498Szrj     case FIX_TRUNC_EXPR:
1038538fd1498Szrj       c1 = VEC_PACK_FIX_TRUNC_EXPR;
1038638fd1498Szrj       break;
1038738fd1498Szrj 
1038838fd1498Szrj     case FLOAT_EXPR:
1038938fd1498Szrj       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
1039038fd1498Szrj 	 tree code and optabs used for computing the operation.  */
1039138fd1498Szrj       return false;
1039238fd1498Szrj 
1039338fd1498Szrj     default:
1039438fd1498Szrj       gcc_unreachable ();
1039538fd1498Szrj     }
1039638fd1498Szrj 
1039738fd1498Szrj   if (code == FIX_TRUNC_EXPR)
1039838fd1498Szrj     /* The signedness is determined from output operand.  */
1039938fd1498Szrj     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
1040038fd1498Szrj   else
1040138fd1498Szrj     optab1 = optab_for_tree_code (c1, vectype, optab_default);
1040238fd1498Szrj 
1040338fd1498Szrj   if (!optab1)
1040438fd1498Szrj     return false;
1040538fd1498Szrj 
1040638fd1498Szrj   vec_mode = TYPE_MODE (vectype);
1040738fd1498Szrj   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
1040838fd1498Szrj     return false;
1040938fd1498Szrj 
1041038fd1498Szrj   *code1 = c1;
1041138fd1498Szrj 
1041238fd1498Szrj   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
1041338fd1498Szrj     /* For scalar masks we may have different boolean
1041438fd1498Szrj        vector types having the same QImode.  Thus we
1041538fd1498Szrj        add additional check for elements number.  */
1041638fd1498Szrj     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1041738fd1498Szrj 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
1041838fd1498Szrj 			 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
1041938fd1498Szrj 
1042038fd1498Szrj   /* Check if it's a multi-step conversion that can be done using intermediate
1042138fd1498Szrj      types.  */
1042238fd1498Szrj   prev_mode = vec_mode;
1042338fd1498Szrj   prev_type = vectype;
1042438fd1498Szrj   if (code == FIX_TRUNC_EXPR)
1042538fd1498Szrj     uns = TYPE_UNSIGNED (vectype_out);
1042638fd1498Szrj   else
1042738fd1498Szrj     uns = TYPE_UNSIGNED (vectype);
1042838fd1498Szrj 
1042938fd1498Szrj   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
1043038fd1498Szrj      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
1043138fd1498Szrj      costly than signed.  */
1043238fd1498Szrj   if (code == FIX_TRUNC_EXPR && uns)
1043338fd1498Szrj     {
1043438fd1498Szrj       enum insn_code icode2;
1043538fd1498Szrj 
1043638fd1498Szrj       intermediate_type
1043738fd1498Szrj 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
1043838fd1498Szrj       interm_optab
1043938fd1498Szrj 	= optab_for_tree_code (c1, intermediate_type, optab_default);
1044038fd1498Szrj       if (interm_optab != unknown_optab
1044138fd1498Szrj 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
1044238fd1498Szrj 	  && insn_data[icode1].operand[0].mode
1044338fd1498Szrj 	     == insn_data[icode2].operand[0].mode)
1044438fd1498Szrj 	{
1044538fd1498Szrj 	  uns = false;
1044638fd1498Szrj 	  optab1 = interm_optab;
1044738fd1498Szrj 	  icode1 = icode2;
1044838fd1498Szrj 	}
1044938fd1498Szrj     }
1045038fd1498Szrj 
1045138fd1498Szrj   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
1045238fd1498Szrj      intermediate steps in promotion sequence.  We try
1045338fd1498Szrj      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
1045438fd1498Szrj   interm_types->create (MAX_INTERM_CVT_STEPS);
1045538fd1498Szrj   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
1045638fd1498Szrj     {
1045738fd1498Szrj       intermediate_mode = insn_data[icode1].operand[0].mode;
1045838fd1498Szrj       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
1045938fd1498Szrj 	{
1046038fd1498Szrj 	  intermediate_type = vect_double_mask_nunits (prev_type);
1046138fd1498Szrj 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
1046238fd1498Szrj 	    return false;
1046338fd1498Szrj 	}
1046438fd1498Szrj       else
1046538fd1498Szrj 	intermediate_type
1046638fd1498Szrj 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
1046738fd1498Szrj       interm_optab
1046838fd1498Szrj 	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
1046938fd1498Szrj 			       optab_default);
1047038fd1498Szrj       if (!interm_optab
1047138fd1498Szrj 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
1047238fd1498Szrj 	  || insn_data[icode1].operand[0].mode != intermediate_mode
1047338fd1498Szrj 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
1047438fd1498Szrj 	      == CODE_FOR_nothing))
1047538fd1498Szrj 	break;
1047638fd1498Szrj 
1047738fd1498Szrj       interm_types->quick_push (intermediate_type);
1047838fd1498Szrj       (*multi_step_cvt)++;
1047938fd1498Szrj 
1048038fd1498Szrj       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
1048138fd1498Szrj 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
1048238fd1498Szrj 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
1048338fd1498Szrj 			     TYPE_VECTOR_SUBPARTS (narrow_vectype)));
1048438fd1498Szrj 
1048538fd1498Szrj       prev_mode = intermediate_mode;
1048638fd1498Szrj       prev_type = intermediate_type;
1048738fd1498Szrj       optab1 = interm_optab;
1048838fd1498Szrj     }
1048938fd1498Szrj 
1049038fd1498Szrj   interm_types->release ();
1049138fd1498Szrj   return false;
1049238fd1498Szrj }
1049338fd1498Szrj 
1049438fd1498Szrj /* Generate and return a statement that sets vector mask MASK such that
1049538fd1498Szrj    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
1049638fd1498Szrj 
1049738fd1498Szrj gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)1049838fd1498Szrj vect_gen_while (tree mask, tree start_index, tree end_index)
1049938fd1498Szrj {
1050038fd1498Szrj   tree cmp_type = TREE_TYPE (start_index);
1050138fd1498Szrj   tree mask_type = TREE_TYPE (mask);
1050238fd1498Szrj   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
1050338fd1498Szrj 						       cmp_type, mask_type,
1050438fd1498Szrj 						       OPTIMIZE_FOR_SPEED));
1050538fd1498Szrj   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
1050638fd1498Szrj 					    start_index, end_index,
1050738fd1498Szrj 					    build_zero_cst (mask_type));
1050838fd1498Szrj   gimple_call_set_lhs (call, mask);
1050938fd1498Szrj   return call;
1051038fd1498Szrj }
1051138fd1498Szrj 
1051238fd1498Szrj /* Generate a vector mask of type MASK_TYPE for which index I is false iff
1051338fd1498Szrj    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
1051438fd1498Szrj 
1051538fd1498Szrj tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)1051638fd1498Szrj vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
1051738fd1498Szrj 		    tree end_index)
1051838fd1498Szrj {
1051938fd1498Szrj   tree tmp = make_ssa_name (mask_type);
1052038fd1498Szrj   gcall *call = vect_gen_while (tmp, start_index, end_index);
1052138fd1498Szrj   gimple_seq_add_stmt (seq, call);
1052238fd1498Szrj   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
1052338fd1498Szrj }
10524