138fd1498Szrj /* Data References Analysis and Manipulation Utilities for Vectorization.
238fd1498Szrj    Copyright (C) 2003-2018 Free Software Foundation, Inc.
338fd1498Szrj    Contributed by Dorit Naishlos <dorit@il.ibm.com>
438fd1498Szrj    and Ira Rosen <irar@il.ibm.com>
538fd1498Szrj 
638fd1498Szrj This file is part of GCC.
738fd1498Szrj 
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
938fd1498Szrj the terms of the GNU General Public License as published by the Free
1038fd1498Szrj Software Foundation; either version 3, or (at your option) any later
1138fd1498Szrj version.
1238fd1498Szrj 
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1438fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1538fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1638fd1498Szrj for more details.
1738fd1498Szrj 
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3.  If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>.  */
2138fd1498Szrj 
2238fd1498Szrj #include "config.h"
2338fd1498Szrj #include "system.h"
2438fd1498Szrj #include "coretypes.h"
2538fd1498Szrj #include "backend.h"
2638fd1498Szrj #include "target.h"
2738fd1498Szrj #include "rtl.h"
2838fd1498Szrj #include "tree.h"
2938fd1498Szrj #include "gimple.h"
3038fd1498Szrj #include "predict.h"
3138fd1498Szrj #include "memmodel.h"
3238fd1498Szrj #include "tm_p.h"
3338fd1498Szrj #include "ssa.h"
3438fd1498Szrj #include "optabs-tree.h"
3538fd1498Szrj #include "cgraph.h"
3638fd1498Szrj #include "dumpfile.h"
3738fd1498Szrj #include "alias.h"
3838fd1498Szrj #include "fold-const.h"
3938fd1498Szrj #include "stor-layout.h"
4038fd1498Szrj #include "tree-eh.h"
4138fd1498Szrj #include "gimplify.h"
4238fd1498Szrj #include "gimple-iterator.h"
4338fd1498Szrj #include "gimplify-me.h"
4438fd1498Szrj #include "tree-ssa-loop-ivopts.h"
4538fd1498Szrj #include "tree-ssa-loop-manip.h"
4638fd1498Szrj #include "tree-ssa-loop.h"
4738fd1498Szrj #include "cfgloop.h"
4838fd1498Szrj #include "tree-scalar-evolution.h"
4938fd1498Szrj #include "tree-vectorizer.h"
5038fd1498Szrj #include "expr.h"
5138fd1498Szrj #include "builtins.h"
5238fd1498Szrj #include "params.h"
5338fd1498Szrj #include "tree-cfg.h"
5438fd1498Szrj #include "tree-hash-traits.h"
5538fd1498Szrj #include "vec-perm-indices.h"
5638fd1498Szrj #include "internal-fn.h"
5738fd1498Szrj 
5838fd1498Szrj /* Return true if load- or store-lanes optab OPTAB is implemented for
5938fd1498Szrj    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
6038fd1498Szrj 
6138fd1498Szrj static bool
vect_lanes_optab_supported_p(const char * name,convert_optab optab,tree vectype,unsigned HOST_WIDE_INT count)6238fd1498Szrj vect_lanes_optab_supported_p (const char *name, convert_optab optab,
6338fd1498Szrj 			      tree vectype, unsigned HOST_WIDE_INT count)
6438fd1498Szrj {
6538fd1498Szrj   machine_mode mode, array_mode;
6638fd1498Szrj   bool limit_p;
6738fd1498Szrj 
6838fd1498Szrj   mode = TYPE_MODE (vectype);
6938fd1498Szrj   if (!targetm.array_mode (mode, count).exists (&array_mode))
7038fd1498Szrj     {
7138fd1498Szrj       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
7238fd1498Szrj       limit_p = !targetm.array_mode_supported_p (mode, count);
7338fd1498Szrj       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
7438fd1498Szrj 	{
7538fd1498Szrj 	  if (dump_enabled_p ())
7638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7738fd1498Szrj 			     "no array mode for %s["
7838fd1498Szrj 			     HOST_WIDE_INT_PRINT_DEC "]\n",
7938fd1498Szrj 			     GET_MODE_NAME (mode), count);
8038fd1498Szrj 	  return false;
8138fd1498Szrj 	}
8238fd1498Szrj     }
8338fd1498Szrj 
8438fd1498Szrj   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
8538fd1498Szrj     {
8638fd1498Szrj       if (dump_enabled_p ())
8738fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8838fd1498Szrj                          "cannot use %s<%s><%s>\n", name,
8938fd1498Szrj                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
9038fd1498Szrj       return false;
9138fd1498Szrj     }
9238fd1498Szrj 
9338fd1498Szrj   if (dump_enabled_p ())
9438fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
9538fd1498Szrj                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
9638fd1498Szrj                      GET_MODE_NAME (mode));
9738fd1498Szrj 
9838fd1498Szrj   return true;
9938fd1498Szrj }
10038fd1498Szrj 
10138fd1498Szrj 
10238fd1498Szrj /* Return the smallest scalar part of STMT.
10338fd1498Szrj    This is used to determine the vectype of the stmt.  We generally set the
10438fd1498Szrj    vectype according to the type of the result (lhs).  For stmts whose
10538fd1498Szrj    result-type is different than the type of the arguments (e.g., demotion,
10638fd1498Szrj    promotion), vectype will be reset appropriately (later).  Note that we have
10738fd1498Szrj    to visit the smallest datatype in this function, because that determines the
10838fd1498Szrj    VF.  If the smallest datatype in the loop is present only as the rhs of a
10938fd1498Szrj    promotion operation - we'd miss it.
11038fd1498Szrj    Such a case, where a variable of this datatype does not appear in the lhs
11138fd1498Szrj    anywhere in the loop, can only occur if it's an invariant: e.g.:
11238fd1498Szrj    'int_x = (int) short_inv', which we'd expect to have been optimized away by
11338fd1498Szrj    invariant motion.  However, we cannot rely on invariant motion to always
11438fd1498Szrj    take invariants out of the loop, and so in the case of promotion we also
11538fd1498Szrj    have to check the rhs.
11638fd1498Szrj    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
11738fd1498Szrj    types.  */
11838fd1498Szrj 
11938fd1498Szrj tree
vect_get_smallest_scalar_type(gimple * stmt,HOST_WIDE_INT * lhs_size_unit,HOST_WIDE_INT * rhs_size_unit)12038fd1498Szrj vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
12138fd1498Szrj                                HOST_WIDE_INT *rhs_size_unit)
12238fd1498Szrj {
12338fd1498Szrj   tree scalar_type = gimple_expr_type (stmt);
12438fd1498Szrj   HOST_WIDE_INT lhs, rhs;
12538fd1498Szrj 
12638fd1498Szrj   /* During the analysis phase, this function is called on arbitrary
12738fd1498Szrj      statements that might not have scalar results.  */
12838fd1498Szrj   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
12938fd1498Szrj     return scalar_type;
13038fd1498Szrj 
13138fd1498Szrj   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
13238fd1498Szrj 
13338fd1498Szrj   if (is_gimple_assign (stmt)
13438fd1498Szrj       && (gimple_assign_cast_p (stmt)
13538fd1498Szrj           || gimple_assign_rhs_code (stmt) == DOT_PROD_EXPR
13638fd1498Szrj           || gimple_assign_rhs_code (stmt) == WIDEN_SUM_EXPR
13738fd1498Szrj           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
13838fd1498Szrj           || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
13938fd1498Szrj           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
14038fd1498Szrj     {
14138fd1498Szrj       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
14238fd1498Szrj 
14338fd1498Szrj       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
14438fd1498Szrj       if (rhs < lhs)
14538fd1498Szrj         scalar_type = rhs_type;
14638fd1498Szrj     }
14738fd1498Szrj 
14838fd1498Szrj   *lhs_size_unit = lhs;
14938fd1498Szrj   *rhs_size_unit = rhs;
15038fd1498Szrj   return scalar_type;
15138fd1498Szrj }
15238fd1498Szrj 
15338fd1498Szrj 
15438fd1498Szrj /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
15538fd1498Szrj    tested at run-time.  Return TRUE if DDR was successfully inserted.
15638fd1498Szrj    Return false if versioning is not supported.  */
15738fd1498Szrj 
15838fd1498Szrj static bool
vect_mark_for_runtime_alias_test(ddr_p ddr,loop_vec_info loop_vinfo)15938fd1498Szrj vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
16038fd1498Szrj {
16138fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
16238fd1498Szrj 
16338fd1498Szrj   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
16438fd1498Szrj     return false;
16538fd1498Szrj 
16638fd1498Szrj   if (!runtime_alias_check_p (ddr, loop,
16738fd1498Szrj 			      optimize_loop_nest_for_speed_p (loop)))
16838fd1498Szrj     return false;
16938fd1498Szrj 
17038fd1498Szrj   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
17138fd1498Szrj   return true;
17238fd1498Szrj }
17338fd1498Szrj 
17438fd1498Szrj /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero.  */
17538fd1498Szrj 
17638fd1498Szrj static void
vect_check_nonzero_value(loop_vec_info loop_vinfo,tree value)17738fd1498Szrj vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
17838fd1498Szrj {
17938fd1498Szrj   vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
18038fd1498Szrj   for (unsigned int i = 0; i < checks.length(); ++i)
18138fd1498Szrj     if (checks[i] == value)
18238fd1498Szrj       return;
18338fd1498Szrj 
18438fd1498Szrj   if (dump_enabled_p ())
18538fd1498Szrj     {
18638fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "need run-time check that ");
18738fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, value);
18838fd1498Szrj       dump_printf (MSG_NOTE, " is nonzero\n");
18938fd1498Szrj     }
19038fd1498Szrj   LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
19138fd1498Szrj }
19238fd1498Szrj 
19338fd1498Szrj /* Return true if we know that the order of vectorized STMT_A and
19438fd1498Szrj    vectorized STMT_B will be the same as the order of STMT_A and STMT_B.
19538fd1498Szrj    At least one of the statements is a write.  */
19638fd1498Szrj 
19738fd1498Szrj static bool
vect_preserves_scalar_order_p(gimple * stmt_a,gimple * stmt_b)19838fd1498Szrj vect_preserves_scalar_order_p (gimple *stmt_a, gimple *stmt_b)
19938fd1498Szrj {
20038fd1498Szrj   stmt_vec_info stmtinfo_a = vinfo_for_stmt (stmt_a);
20138fd1498Szrj   stmt_vec_info stmtinfo_b = vinfo_for_stmt (stmt_b);
20238fd1498Szrj 
20338fd1498Szrj   /* Single statements are always kept in their original order.  */
20438fd1498Szrj   if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
20538fd1498Szrj       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
20638fd1498Szrj     return true;
20738fd1498Szrj 
20838fd1498Szrj   /* STMT_A and STMT_B belong to overlapping groups.  All loads in a
209*e215fc28Szrj      SLP group are emitted at the position of the last scalar load and
210*e215fc28Szrj      all loads in an interleaving group are emitted at the position
211*e215fc28Szrj      of the first scalar load.
212*e215fc28Szrj      Stores in a group are emitted at the position of the last scalar store.
21358e805e6Szrj      Compute that position and check whether the resulting order matches
214*e215fc28Szrj      the current one.
215*e215fc28Szrj      We have not yet decided between SLP and interleaving so we have
216*e215fc28Szrj      to conservatively assume both.  */
217*e215fc28Szrj   gimple *il_a;
218*e215fc28Szrj   gimple *last_a = il_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
21958e805e6Szrj   if (last_a)
220*e215fc28Szrj     {
22158e805e6Szrj       for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (last_a)); s;
22258e805e6Szrj 	   s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
22358e805e6Szrj 	last_a = get_later_stmt (last_a, s);
224*e215fc28Szrj       if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
225*e215fc28Szrj 	{
226*e215fc28Szrj 	  for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (il_a)); s;
227*e215fc28Szrj 	       s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
228*e215fc28Szrj 	    if (get_later_stmt (il_a, s) == il_a)
229*e215fc28Szrj 	      il_a = s;
230*e215fc28Szrj 	}
23158e805e6Szrj       else
232*e215fc28Szrj 	il_a = last_a;
233*e215fc28Szrj     }
234*e215fc28Szrj   else
235*e215fc28Szrj     last_a = il_a = stmt_a;
236*e215fc28Szrj   gimple *il_b;
237*e215fc28Szrj   gimple *last_b = il_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
23858e805e6Szrj   if (last_b)
239*e215fc28Szrj     {
24058e805e6Szrj       for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (last_b)); s;
24158e805e6Szrj 	   s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
24258e805e6Szrj 	last_b = get_later_stmt (last_b, s);
243*e215fc28Szrj       if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
244*e215fc28Szrj 	{
245*e215fc28Szrj 	  for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (il_b)); s;
246*e215fc28Szrj 	       s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
247*e215fc28Szrj 	    if (get_later_stmt (il_b, s) == il_b)
248*e215fc28Szrj 	      il_b = s;
249*e215fc28Szrj 	}
25058e805e6Szrj       else
251*e215fc28Szrj 	il_b = last_b;
252*e215fc28Szrj     }
253*e215fc28Szrj   else
254*e215fc28Szrj     last_b = il_b = stmt_b;
255*e215fc28Szrj   bool a_after_b = (get_later_stmt (stmt_a, stmt_b) == stmt_a);
256*e215fc28Szrj   return (/* SLP */
257*e215fc28Szrj 	  (get_later_stmt (last_a, last_b) == last_a) == a_after_b
258*e215fc28Szrj 	  /* Interleaving */
259*e215fc28Szrj 	  && (get_later_stmt (il_a, il_b) == il_a) == a_after_b
260*e215fc28Szrj 	  /* Mixed */
261*e215fc28Szrj 	  && (get_later_stmt (il_a, last_b) == il_a) == a_after_b
262*e215fc28Szrj 	  && (get_later_stmt (last_a, il_b) == last_a) == a_after_b);
26338fd1498Szrj }
26438fd1498Szrj 
26538fd1498Szrj /* A subroutine of vect_analyze_data_ref_dependence.  Handle
26638fd1498Szrj    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
26738fd1498Szrj    distances.  These distances are conservatively correct but they don't
26838fd1498Szrj    reflect a guaranteed dependence.
26938fd1498Szrj 
27038fd1498Szrj    Return true if this function does all the work necessary to avoid
27138fd1498Szrj    an alias or false if the caller should use the dependence distances
27238fd1498Szrj    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
27338fd1498Szrj    the depth of the loop described by LOOP_VINFO and the other arguments
27438fd1498Szrj    are as for vect_analyze_data_ref_dependence.  */
27538fd1498Szrj 
27638fd1498Szrj static bool
vect_analyze_possibly_independent_ddr(data_dependence_relation * ddr,loop_vec_info loop_vinfo,int loop_depth,unsigned int * max_vf)27738fd1498Szrj vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
27838fd1498Szrj 				       loop_vec_info loop_vinfo,
27938fd1498Szrj 				       int loop_depth, unsigned int *max_vf)
28038fd1498Szrj {
28138fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
28238fd1498Szrj   lambda_vector dist_v;
28338fd1498Szrj   unsigned int i;
28438fd1498Szrj   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
28538fd1498Szrj     {
28638fd1498Szrj       int dist = dist_v[loop_depth];
28738fd1498Szrj       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
28838fd1498Szrj 	{
28938fd1498Szrj 	  /* If the user asserted safelen >= DIST consecutive iterations
29038fd1498Szrj 	     can be executed concurrently, assume independence.
29138fd1498Szrj 
29238fd1498Szrj 	     ??? An alternative would be to add the alias check even
29338fd1498Szrj 	     in this case, and vectorize the fallback loop with the
29438fd1498Szrj 	     maximum VF set to safelen.  However, if the user has
29538fd1498Szrj 	     explicitly given a length, it's less likely that that
29638fd1498Szrj 	     would be a win.  */
29738fd1498Szrj 	  if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
29838fd1498Szrj 	    {
29938fd1498Szrj 	      if ((unsigned int) loop->safelen < *max_vf)
30038fd1498Szrj 		*max_vf = loop->safelen;
30138fd1498Szrj 	      LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
30238fd1498Szrj 	      continue;
30338fd1498Szrj 	    }
30438fd1498Szrj 
30538fd1498Szrj 	  /* For dependence distances of 2 or more, we have the option
30638fd1498Szrj 	     of limiting VF or checking for an alias at runtime.
30738fd1498Szrj 	     Prefer to check at runtime if we can, to avoid limiting
30838fd1498Szrj 	     the VF unnecessarily when the bases are in fact independent.
30938fd1498Szrj 
31038fd1498Szrj 	     Note that the alias checks will be removed if the VF ends up
31138fd1498Szrj 	     being small enough.  */
31238fd1498Szrj 	  return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
31338fd1498Szrj 	}
31438fd1498Szrj     }
31538fd1498Szrj   return true;
31638fd1498Szrj }
31738fd1498Szrj 
31838fd1498Szrj 
31938fd1498Szrj /* Function vect_analyze_data_ref_dependence.
32038fd1498Szrj 
32138fd1498Szrj    Return TRUE if there (might) exist a dependence between a memory-reference
32238fd1498Szrj    DRA and a memory-reference DRB.  When versioning for alias may check a
32338fd1498Szrj    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
32438fd1498Szrj    the data dependence.  */
32538fd1498Szrj 
32638fd1498Szrj static bool
vect_analyze_data_ref_dependence(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo,unsigned int * max_vf)32738fd1498Szrj vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
32838fd1498Szrj 				  loop_vec_info loop_vinfo,
32938fd1498Szrj 				  unsigned int *max_vf)
33038fd1498Szrj {
33138fd1498Szrj   unsigned int i;
33238fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
33338fd1498Szrj   struct data_reference *dra = DDR_A (ddr);
33438fd1498Szrj   struct data_reference *drb = DDR_B (ddr);
33538fd1498Szrj   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
33638fd1498Szrj   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
33738fd1498Szrj   lambda_vector dist_v;
33838fd1498Szrj   unsigned int loop_depth;
33938fd1498Szrj 
34038fd1498Szrj   /* In loop analysis all data references should be vectorizable.  */
34138fd1498Szrj   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
34238fd1498Szrj       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
34338fd1498Szrj     gcc_unreachable ();
34438fd1498Szrj 
34538fd1498Szrj   /* Independent data accesses.  */
34638fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
34738fd1498Szrj     return false;
34838fd1498Szrj 
34938fd1498Szrj   if (dra == drb
35038fd1498Szrj       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
35138fd1498Szrj     return false;
35238fd1498Szrj 
35338fd1498Szrj   /* We do not have to consider dependences between accesses that belong
35438fd1498Szrj      to the same group, unless the stride could be smaller than the
35538fd1498Szrj      group size.  */
35638fd1498Szrj   if (GROUP_FIRST_ELEMENT (stmtinfo_a)
35738fd1498Szrj       && GROUP_FIRST_ELEMENT (stmtinfo_a) == GROUP_FIRST_ELEMENT (stmtinfo_b)
35838fd1498Szrj       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
35938fd1498Szrj     return false;
36038fd1498Szrj 
36138fd1498Szrj   /* Even if we have an anti-dependence then, as the vectorized loop covers at
36238fd1498Szrj      least two scalar iterations, there is always also a true dependence.
36338fd1498Szrj      As the vectorizer does not re-order loads and stores we can ignore
36438fd1498Szrj      the anti-dependence if TBAA can disambiguate both DRs similar to the
36538fd1498Szrj      case with known negative distance anti-dependences (positive
36638fd1498Szrj      distance anti-dependences would violate TBAA constraints).  */
36738fd1498Szrj   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
36838fd1498Szrj        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
36938fd1498Szrj       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
37038fd1498Szrj 				 get_alias_set (DR_REF (drb))))
37138fd1498Szrj     return false;
37238fd1498Szrj 
37338fd1498Szrj   /* Unknown data dependence.  */
37438fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
37538fd1498Szrj     {
37638fd1498Szrj       /* If user asserted safelen consecutive iterations can be
37738fd1498Szrj 	 executed concurrently, assume independence.  */
37838fd1498Szrj       if (loop->safelen >= 2)
37938fd1498Szrj 	{
38038fd1498Szrj 	  if ((unsigned int) loop->safelen < *max_vf)
38138fd1498Szrj 	    *max_vf = loop->safelen;
38238fd1498Szrj 	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
38338fd1498Szrj 	  return false;
38438fd1498Szrj 	}
38538fd1498Szrj 
38638fd1498Szrj       if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
38738fd1498Szrj 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
38838fd1498Szrj 	{
38938fd1498Szrj 	  if (dump_enabled_p ())
39038fd1498Szrj 	    {
39138fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
39238fd1498Szrj 			       "versioning for alias not supported for: "
39338fd1498Szrj 			       "can't determine dependence between ");
39438fd1498Szrj 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
39538fd1498Szrj 				 DR_REF (dra));
39638fd1498Szrj 	      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
39738fd1498Szrj 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
39838fd1498Szrj 				 DR_REF (drb));
39938fd1498Szrj 	      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
40038fd1498Szrj 	    }
40138fd1498Szrj 	  return true;
40238fd1498Szrj 	}
40338fd1498Szrj 
40438fd1498Szrj       if (dump_enabled_p ())
40538fd1498Szrj 	{
40638fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
40738fd1498Szrj 			   "versioning for alias required: "
40838fd1498Szrj 			   "can't determine dependence between ");
40938fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
41038fd1498Szrj 			     DR_REF (dra));
41138fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
41238fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
41338fd1498Szrj 			     DR_REF (drb));
41438fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
41538fd1498Szrj 	}
41638fd1498Szrj 
41738fd1498Szrj       /* Add to list of ddrs that need to be tested at run-time.  */
41838fd1498Szrj       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
41938fd1498Szrj     }
42038fd1498Szrj 
42138fd1498Szrj   /* Known data dependence.  */
42238fd1498Szrj   if (DDR_NUM_DIST_VECTS (ddr) == 0)
42338fd1498Szrj     {
42438fd1498Szrj       /* If user asserted safelen consecutive iterations can be
42538fd1498Szrj 	 executed concurrently, assume independence.  */
42638fd1498Szrj       if (loop->safelen >= 2)
42738fd1498Szrj 	{
42838fd1498Szrj 	  if ((unsigned int) loop->safelen < *max_vf)
42938fd1498Szrj 	    *max_vf = loop->safelen;
43038fd1498Szrj 	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
43138fd1498Szrj 	  return false;
43238fd1498Szrj 	}
43338fd1498Szrj 
43438fd1498Szrj       if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
43538fd1498Szrj 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
43638fd1498Szrj 	{
43738fd1498Szrj 	  if (dump_enabled_p ())
43838fd1498Szrj 	    {
43938fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
44038fd1498Szrj 			       "versioning for alias not supported for: "
44138fd1498Szrj 			       "bad dist vector for ");
44238fd1498Szrj 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
44338fd1498Szrj 				 DR_REF (dra));
44438fd1498Szrj 	      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
44538fd1498Szrj 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
44638fd1498Szrj 				 DR_REF (drb));
44738fd1498Szrj 	      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
44838fd1498Szrj 	    }
44938fd1498Szrj 	  return true;
45038fd1498Szrj 	}
45138fd1498Szrj 
45238fd1498Szrj       if (dump_enabled_p ())
45338fd1498Szrj         {
45438fd1498Szrj           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
45538fd1498Szrj                            "versioning for alias required: "
45638fd1498Szrj                            "bad dist vector for ");
45738fd1498Szrj           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
45838fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
45938fd1498Szrj           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
46038fd1498Szrj           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
46138fd1498Szrj         }
46238fd1498Szrj       /* Add to list of ddrs that need to be tested at run-time.  */
46338fd1498Szrj       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
46438fd1498Szrj     }
46538fd1498Szrj 
46638fd1498Szrj   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
46738fd1498Szrj 
46838fd1498Szrj   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
46938fd1498Szrj       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
47038fd1498Szrj 						loop_depth, max_vf))
47138fd1498Szrj     return false;
47238fd1498Szrj 
47338fd1498Szrj   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
47438fd1498Szrj     {
47538fd1498Szrj       int dist = dist_v[loop_depth];
47638fd1498Szrj 
47738fd1498Szrj       if (dump_enabled_p ())
47838fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
47938fd1498Szrj                          "dependence distance  = %d.\n", dist);
48038fd1498Szrj 
48138fd1498Szrj       if (dist == 0)
48238fd1498Szrj 	{
48338fd1498Szrj 	  if (dump_enabled_p ())
48438fd1498Szrj 	    {
48538fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
48638fd1498Szrj 	                       "dependence distance == 0 between ");
48738fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
48838fd1498Szrj 	      dump_printf (MSG_NOTE, " and ");
48938fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
49038fd1498Szrj 	      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
49138fd1498Szrj 	    }
49238fd1498Szrj 
49338fd1498Szrj 	  /* When we perform grouped accesses and perform implicit CSE
49438fd1498Szrj 	     by detecting equal accesses and doing disambiguation with
49538fd1498Szrj 	     runtime alias tests like for
49638fd1498Szrj 	        .. = a[i];
49738fd1498Szrj 		.. = a[i+1];
49838fd1498Szrj 		a[i] = ..;
49938fd1498Szrj 		a[i+1] = ..;
50038fd1498Szrj 		*p = ..;
50138fd1498Szrj 		.. = a[i];
50238fd1498Szrj 		.. = a[i+1];
50338fd1498Szrj 	     where we will end up loading { a[i], a[i+1] } once, make
50438fd1498Szrj 	     sure that inserting group loads before the first load and
50538fd1498Szrj 	     stores after the last store will do the right thing.
50638fd1498Szrj 	     Similar for groups like
50738fd1498Szrj 	        a[i] = ...;
50838fd1498Szrj 		... = a[i];
50938fd1498Szrj 		a[i+1] = ...;
51038fd1498Szrj 	     where loads from the group interleave with the store.  */
51138fd1498Szrj 	  if (!vect_preserves_scalar_order_p (DR_STMT (dra), DR_STMT (drb)))
51238fd1498Szrj 	    {
51338fd1498Szrj 	      if (dump_enabled_p ())
51438fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
51538fd1498Szrj 				 "READ_WRITE dependence in interleaving.\n");
51638fd1498Szrj 	      return true;
51738fd1498Szrj 	    }
51838fd1498Szrj 
51938fd1498Szrj 	  if (loop->safelen < 2)
52038fd1498Szrj 	    {
52138fd1498Szrj 	      tree indicator = dr_zero_step_indicator (dra);
52238fd1498Szrj 	      if (TREE_CODE (indicator) != INTEGER_CST)
52338fd1498Szrj 		vect_check_nonzero_value (loop_vinfo, indicator);
52438fd1498Szrj 	      else if (integer_zerop (indicator))
52538fd1498Szrj 		{
52638fd1498Szrj 		  if (dump_enabled_p ())
52738fd1498Szrj 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
52838fd1498Szrj 				 "access also has a zero step\n");
52938fd1498Szrj 		  return true;
53038fd1498Szrj 		}
53138fd1498Szrj 	    }
53238fd1498Szrj 	  continue;
53338fd1498Szrj 	}
53438fd1498Szrj 
53538fd1498Szrj       if (dist > 0 && DDR_REVERSED_P (ddr))
53638fd1498Szrj 	{
53738fd1498Szrj 	  /* If DDR_REVERSED_P the order of the data-refs in DDR was
53838fd1498Szrj 	     reversed (to make distance vector positive), and the actual
53938fd1498Szrj 	     distance is negative.  */
54038fd1498Szrj 	  if (dump_enabled_p ())
54138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
54238fd1498Szrj 	                     "dependence distance negative.\n");
54338fd1498Szrj 	  /* Record a negative dependence distance to later limit the
54438fd1498Szrj 	     amount of stmt copying / unrolling we can perform.
54538fd1498Szrj 	     Only need to handle read-after-write dependence.  */
54638fd1498Szrj 	  if (DR_IS_READ (drb)
54738fd1498Szrj 	      && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
54838fd1498Szrj 		  || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
54938fd1498Szrj 	    STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
55038fd1498Szrj 	  continue;
55138fd1498Szrj 	}
55238fd1498Szrj 
55338fd1498Szrj       unsigned int abs_dist = abs (dist);
55438fd1498Szrj       if (abs_dist >= 2 && abs_dist < *max_vf)
55538fd1498Szrj 	{
55638fd1498Szrj 	  /* The dependence distance requires reduction of the maximal
55738fd1498Szrj 	     vectorization factor.  */
55838fd1498Szrj 	  *max_vf = abs (dist);
55938fd1498Szrj 	  if (dump_enabled_p ())
56038fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
56138fd1498Szrj 	                     "adjusting maximal vectorization factor to %i\n",
56238fd1498Szrj 	                     *max_vf);
56338fd1498Szrj 	}
56438fd1498Szrj 
56538fd1498Szrj       if (abs_dist >= *max_vf)
56638fd1498Szrj 	{
56738fd1498Szrj 	  /* Dependence distance does not create dependence, as far as
56838fd1498Szrj 	     vectorization is concerned, in this case.  */
56938fd1498Szrj 	  if (dump_enabled_p ())
57038fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
57138fd1498Szrj 	                     "dependence distance >= VF.\n");
57238fd1498Szrj 	  continue;
57338fd1498Szrj 	}
57438fd1498Szrj 
57538fd1498Szrj       if (dump_enabled_p ())
57638fd1498Szrj 	{
57738fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
57838fd1498Szrj 	               "not vectorized, possible dependence "
57938fd1498Szrj 	               "between data-refs ");
58038fd1498Szrj 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
58138fd1498Szrj 	  dump_printf (MSG_NOTE,  " and ");
58238fd1498Szrj 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
58338fd1498Szrj 	  dump_printf (MSG_NOTE,  "\n");
58438fd1498Szrj 	}
58538fd1498Szrj 
58638fd1498Szrj       return true;
58738fd1498Szrj     }
58838fd1498Szrj 
58938fd1498Szrj   return false;
59038fd1498Szrj }
59138fd1498Szrj 
59238fd1498Szrj /* Function vect_analyze_data_ref_dependences.
59338fd1498Szrj 
59438fd1498Szrj    Examine all the data references in the loop, and make sure there do not
59538fd1498Szrj    exist any data dependences between them.  Set *MAX_VF according to
59638fd1498Szrj    the maximum vectorization factor the data dependences allow.  */
59738fd1498Szrj 
59838fd1498Szrj bool
vect_analyze_data_ref_dependences(loop_vec_info loop_vinfo,unsigned int * max_vf)59938fd1498Szrj vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
60038fd1498Szrj 				   unsigned int *max_vf)
60138fd1498Szrj {
60238fd1498Szrj   unsigned int i;
60338fd1498Szrj   struct data_dependence_relation *ddr;
60438fd1498Szrj 
60538fd1498Szrj   if (dump_enabled_p ())
60638fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
60738fd1498Szrj                      "=== vect_analyze_data_ref_dependences ===\n");
60838fd1498Szrj 
60938fd1498Szrj   LOOP_VINFO_DDRS (loop_vinfo)
61038fd1498Szrj     .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
61138fd1498Szrj 	     * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
61238fd1498Szrj   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
61338fd1498Szrj   /* We need read-read dependences to compute STMT_VINFO_SAME_ALIGN_REFS.  */
61438fd1498Szrj   if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
61538fd1498Szrj 				&LOOP_VINFO_DDRS (loop_vinfo),
61638fd1498Szrj 				LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
61738fd1498Szrj     return false;
61838fd1498Szrj 
61938fd1498Szrj   /* For epilogues we either have no aliases or alias versioning
62038fd1498Szrj      was applied to original loop.  Therefore we may just get max_vf
62138fd1498Szrj      using VF of original loop.  */
62238fd1498Szrj   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
62338fd1498Szrj     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
62438fd1498Szrj   else
62538fd1498Szrj     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
62638fd1498Szrj       if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
62738fd1498Szrj 	return false;
62838fd1498Szrj 
62938fd1498Szrj   return true;
63038fd1498Szrj }
63138fd1498Szrj 
63238fd1498Szrj 
63338fd1498Szrj /* Function vect_slp_analyze_data_ref_dependence.
63438fd1498Szrj 
63538fd1498Szrj    Return TRUE if there (might) exist a dependence between a memory-reference
63638fd1498Szrj    DRA and a memory-reference DRB.  When versioning for alias may check a
63738fd1498Szrj    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
63838fd1498Szrj    the data dependence.  */
63938fd1498Szrj 
64038fd1498Szrj static bool
vect_slp_analyze_data_ref_dependence(struct data_dependence_relation * ddr)64138fd1498Szrj vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
64238fd1498Szrj {
64338fd1498Szrj   struct data_reference *dra = DDR_A (ddr);
64438fd1498Szrj   struct data_reference *drb = DDR_B (ddr);
64538fd1498Szrj 
64638fd1498Szrj   /* We need to check dependences of statements marked as unvectorizable
64738fd1498Szrj      as well, they still can prohibit vectorization.  */
64838fd1498Szrj 
64938fd1498Szrj   /* Independent data accesses.  */
65038fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
65138fd1498Szrj     return false;
65238fd1498Szrj 
65338fd1498Szrj   if (dra == drb)
65438fd1498Szrj     return false;
65538fd1498Szrj 
65638fd1498Szrj   /* Read-read is OK.  */
65738fd1498Szrj   if (DR_IS_READ (dra) && DR_IS_READ (drb))
65838fd1498Szrj     return false;
65938fd1498Szrj 
66038fd1498Szrj   /* If dra and drb are part of the same interleaving chain consider
66138fd1498Szrj      them independent.  */
66238fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
66338fd1498Szrj       && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
66438fd1498Szrj 	  == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
66538fd1498Szrj     return false;
66638fd1498Szrj 
66738fd1498Szrj   /* Unknown data dependence.  */
66838fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
66938fd1498Szrj     {
67038fd1498Szrj       if  (dump_enabled_p ())
67138fd1498Szrj 	{
67238fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
67338fd1498Szrj 			   "can't determine dependence between ");
67438fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
67538fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
67638fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
67738fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION,  "\n");
67838fd1498Szrj 	}
67938fd1498Szrj     }
68038fd1498Szrj   else if (dump_enabled_p ())
68138fd1498Szrj     {
68238fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
68338fd1498Szrj 		       "determined dependence between ");
68438fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
68538fd1498Szrj       dump_printf (MSG_NOTE, " and ");
68638fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
68738fd1498Szrj       dump_printf (MSG_NOTE,  "\n");
68838fd1498Szrj     }
68938fd1498Szrj 
69038fd1498Szrj   return true;
69138fd1498Szrj }
69238fd1498Szrj 
69338fd1498Szrj 
69438fd1498Szrj /* Analyze dependences involved in the transform of SLP NODE.  STORES
69538fd1498Szrj    contain the vector of scalar stores of this instance if we are
69638fd1498Szrj    disambiguating the loads.  */
69738fd1498Szrj 
69838fd1498Szrj static bool
vect_slp_analyze_node_dependences(slp_instance instance,slp_tree node,vec<gimple * > stores,gimple * last_store)69938fd1498Szrj vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
70038fd1498Szrj 				   vec<gimple *> stores, gimple *last_store)
70138fd1498Szrj {
70238fd1498Szrj   /* This walks over all stmts involved in the SLP load/store done
70338fd1498Szrj      in NODE verifying we can sink them up to the last stmt in the
70438fd1498Szrj      group.  */
70538fd1498Szrj   gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
70638fd1498Szrj   for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
70738fd1498Szrj     {
70838fd1498Szrj       gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
70938fd1498Szrj       if (access == last_access)
71038fd1498Szrj 	continue;
71138fd1498Szrj       data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
71238fd1498Szrj       for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
71338fd1498Szrj 	   gsi_stmt (gsi) != last_access; gsi_next (&gsi))
71438fd1498Szrj 	{
71538fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
71638fd1498Szrj 	  if (! gimple_vuse (stmt)
71738fd1498Szrj 	      || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
71838fd1498Szrj 	    continue;
71938fd1498Szrj 
72038fd1498Szrj 	  /* If we couldn't record a (single) data reference for this
72138fd1498Szrj 	     stmt we have to give up.  */
72238fd1498Szrj 	  /* ???  Here and below if dependence analysis fails we can resort
72338fd1498Szrj 	     to the alias oracle which can handle more kinds of stmts.  */
72438fd1498Szrj 	  data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
72538fd1498Szrj 	  if (!dr_b)
72638fd1498Szrj 	    return false;
72738fd1498Szrj 
72838fd1498Szrj 	  bool dependent = false;
72938fd1498Szrj 	  /* If we run into a store of this same instance (we've just
73038fd1498Szrj 	     marked those) then delay dependence checking until we run
73138fd1498Szrj 	     into the last store because this is where it will have
73238fd1498Szrj 	     been sunk to (and we verify if we can do that as well).  */
73338fd1498Szrj 	  if (gimple_visited_p (stmt))
73438fd1498Szrj 	    {
73538fd1498Szrj 	      if (stmt != last_store)
73638fd1498Szrj 		continue;
73738fd1498Szrj 	      unsigned i;
73838fd1498Szrj 	      gimple *store;
73938fd1498Szrj 	      FOR_EACH_VEC_ELT (stores, i, store)
74038fd1498Szrj 		{
74138fd1498Szrj 		  data_reference *store_dr
74238fd1498Szrj 		    = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
74338fd1498Szrj 		  ddr_p ddr = initialize_data_dependence_relation
74438fd1498Szrj 				(dr_a, store_dr, vNULL);
74538fd1498Szrj 		  dependent = vect_slp_analyze_data_ref_dependence (ddr);
74638fd1498Szrj 		  free_dependence_relation (ddr);
74738fd1498Szrj 		  if (dependent)
74838fd1498Szrj 		    break;
74938fd1498Szrj 		}
75038fd1498Szrj 	    }
75138fd1498Szrj 	  else
75238fd1498Szrj 	    {
75338fd1498Szrj 	      ddr_p ddr = initialize_data_dependence_relation (dr_a,
75438fd1498Szrj 							       dr_b, vNULL);
75538fd1498Szrj 	      dependent = vect_slp_analyze_data_ref_dependence (ddr);
75638fd1498Szrj 	      free_dependence_relation (ddr);
75738fd1498Szrj 	    }
75838fd1498Szrj 	  if (dependent)
75938fd1498Szrj 	    return false;
76038fd1498Szrj 	}
76138fd1498Szrj     }
76238fd1498Szrj   return true;
76338fd1498Szrj }
76438fd1498Szrj 
76538fd1498Szrj 
76638fd1498Szrj /* Function vect_analyze_data_ref_dependences.
76738fd1498Szrj 
76838fd1498Szrj    Examine all the data references in the basic-block, and make sure there
76938fd1498Szrj    do not exist any data dependences between them.  Set *MAX_VF according to
77038fd1498Szrj    the maximum vectorization factor the data dependences allow.  */
77138fd1498Szrj 
77238fd1498Szrj bool
vect_slp_analyze_instance_dependence(slp_instance instance)77338fd1498Szrj vect_slp_analyze_instance_dependence (slp_instance instance)
77438fd1498Szrj {
77538fd1498Szrj   if (dump_enabled_p ())
77638fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
77738fd1498Szrj                      "=== vect_slp_analyze_instance_dependence ===\n");
77838fd1498Szrj 
77938fd1498Szrj   /* The stores of this instance are at the root of the SLP tree.  */
78038fd1498Szrj   slp_tree store = SLP_INSTANCE_TREE (instance);
78138fd1498Szrj   if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
78238fd1498Szrj     store = NULL;
78338fd1498Szrj 
78438fd1498Szrj   /* Verify we can sink stores to the vectorized stmt insert location.  */
78538fd1498Szrj   gimple *last_store = NULL;
78638fd1498Szrj   if (store)
78738fd1498Szrj     {
78838fd1498Szrj       if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
78938fd1498Szrj 	return false;
79038fd1498Szrj 
79138fd1498Szrj       /* Mark stores in this instance and remember the last one.  */
79238fd1498Szrj       last_store = vect_find_last_scalar_stmt_in_slp (store);
79338fd1498Szrj       for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
79438fd1498Szrj 	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
79538fd1498Szrj     }
79638fd1498Szrj 
79738fd1498Szrj   bool res = true;
79838fd1498Szrj 
79938fd1498Szrj   /* Verify we can sink loads to the vectorized stmt insert location,
80038fd1498Szrj      special-casing stores of this instance.  */
80138fd1498Szrj   slp_tree load;
80238fd1498Szrj   unsigned int i;
80338fd1498Szrj   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
80438fd1498Szrj     if (! vect_slp_analyze_node_dependences (instance, load,
80538fd1498Szrj 					     store
80638fd1498Szrj 					     ? SLP_TREE_SCALAR_STMTS (store)
80738fd1498Szrj 					     : vNULL, last_store))
80838fd1498Szrj       {
80938fd1498Szrj 	res = false;
81038fd1498Szrj 	break;
81138fd1498Szrj       }
81238fd1498Szrj 
81338fd1498Szrj   /* Unset the visited flag.  */
81438fd1498Szrj   if (store)
81538fd1498Szrj     for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
81638fd1498Szrj       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
81738fd1498Szrj 
81838fd1498Szrj   return res;
81938fd1498Szrj }
82038fd1498Szrj 
82138fd1498Szrj /* Record in VINFO the base alignment guarantee given by DRB.  STMT is
82238fd1498Szrj    the statement that contains DRB, which is useful for recording in the
82338fd1498Szrj    dump file.  */
82438fd1498Szrj 
82538fd1498Szrj static void
vect_record_base_alignment(vec_info * vinfo,gimple * stmt,innermost_loop_behavior * drb)82638fd1498Szrj vect_record_base_alignment (vec_info *vinfo, gimple *stmt,
82738fd1498Szrj 			    innermost_loop_behavior *drb)
82838fd1498Szrj {
82938fd1498Szrj   bool existed;
83038fd1498Szrj   innermost_loop_behavior *&entry
83138fd1498Szrj     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
83238fd1498Szrj   if (!existed || entry->base_alignment < drb->base_alignment)
83338fd1498Szrj     {
83438fd1498Szrj       entry = drb;
83538fd1498Szrj       if (dump_enabled_p ())
83638fd1498Szrj 	{
83738fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
83838fd1498Szrj 			   "recording new base alignment for ");
83938fd1498Szrj 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, drb->base_address);
84038fd1498Szrj 	  dump_printf (MSG_NOTE, "\n");
84138fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
84238fd1498Szrj 			   "  alignment:    %d\n", drb->base_alignment);
84338fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
84438fd1498Szrj 			   "  misalignment: %d\n", drb->base_misalignment);
84538fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
84638fd1498Szrj 			   "  based on:     ");
84738fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
84838fd1498Szrj 	}
84938fd1498Szrj     }
85038fd1498Szrj }
85138fd1498Szrj 
85238fd1498Szrj /* If the region we're going to vectorize is reached, all unconditional
85338fd1498Szrj    data references occur at least once.  We can therefore pool the base
85438fd1498Szrj    alignment guarantees from each unconditional reference.  Do this by
85538fd1498Szrj    going through all the data references in VINFO and checking whether
85638fd1498Szrj    the containing statement makes the reference unconditionally.  If so,
85738fd1498Szrj    record the alignment of the base address in VINFO so that it can be
85838fd1498Szrj    used for all other references with the same base.  */
85938fd1498Szrj 
86038fd1498Szrj void
vect_record_base_alignments(vec_info * vinfo)86138fd1498Szrj vect_record_base_alignments (vec_info *vinfo)
86238fd1498Szrj {
86338fd1498Szrj   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
86438fd1498Szrj   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
86538fd1498Szrj   data_reference *dr;
86638fd1498Szrj   unsigned int i;
86738fd1498Szrj   FOR_EACH_VEC_ELT (vinfo->datarefs, i, dr)
86838fd1498Szrj     if (!DR_IS_CONDITIONAL_IN_STMT (dr))
86938fd1498Szrj       {
87038fd1498Szrj 	gimple *stmt = DR_STMT (dr);
87138fd1498Szrj 	vect_record_base_alignment (vinfo, stmt, &DR_INNERMOST (dr));
87238fd1498Szrj 
87338fd1498Szrj 	/* If DR is nested in the loop that is being vectorized, we can also
87438fd1498Szrj 	   record the alignment of the base wrt the outer loop.  */
87538fd1498Szrj 	if (loop && nested_in_vect_loop_p (loop, stmt))
87638fd1498Szrj 	  {
87738fd1498Szrj 	    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
87838fd1498Szrj 	    vect_record_base_alignment
87938fd1498Szrj 	      (vinfo, stmt, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
88038fd1498Szrj 	  }
88138fd1498Szrj       }
88238fd1498Szrj }
88338fd1498Szrj 
88438fd1498Szrj /* Return the target alignment for the vectorized form of DR.  */
88538fd1498Szrj 
88638fd1498Szrj static unsigned int
vect_calculate_target_alignment(struct data_reference * dr)88738fd1498Szrj vect_calculate_target_alignment (struct data_reference *dr)
88838fd1498Szrj {
88938fd1498Szrj   gimple *stmt = DR_STMT (dr);
89038fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
89138fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
89238fd1498Szrj   return targetm.vectorize.preferred_vector_alignment (vectype);
89338fd1498Szrj }
89438fd1498Szrj 
89538fd1498Szrj /* Function vect_compute_data_ref_alignment
89638fd1498Szrj 
89738fd1498Szrj    Compute the misalignment of the data reference DR.
89838fd1498Szrj 
89938fd1498Szrj    Output:
90038fd1498Szrj    1. If during the misalignment computation it is found that the data reference
90138fd1498Szrj       cannot be vectorized then false is returned.
90238fd1498Szrj    2. DR_MISALIGNMENT (DR) is defined.
90338fd1498Szrj 
90438fd1498Szrj    FOR NOW: No analysis is actually performed. Misalignment is calculated
90538fd1498Szrj    only for trivial cases. TODO.  */
90638fd1498Szrj 
90738fd1498Szrj bool
vect_compute_data_ref_alignment(struct data_reference * dr)90838fd1498Szrj vect_compute_data_ref_alignment (struct data_reference *dr)
90938fd1498Szrj {
91038fd1498Szrj   gimple *stmt = DR_STMT (dr);
91138fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
91238fd1498Szrj   vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments;
91338fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
91438fd1498Szrj   struct loop *loop = NULL;
91538fd1498Szrj   tree ref = DR_REF (dr);
91638fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
91738fd1498Szrj 
91838fd1498Szrj   if (dump_enabled_p ())
91938fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
92038fd1498Szrj                      "vect_compute_data_ref_alignment:\n");
92138fd1498Szrj 
92238fd1498Szrj   if (loop_vinfo)
92338fd1498Szrj     loop = LOOP_VINFO_LOOP (loop_vinfo);
92438fd1498Szrj 
92538fd1498Szrj   /* Initialize misalignment to unknown.  */
92638fd1498Szrj   SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
92738fd1498Szrj 
92838fd1498Szrj   innermost_loop_behavior *drb = vect_dr_behavior (dr);
92938fd1498Szrj   bool step_preserves_misalignment_p;
93038fd1498Szrj 
93138fd1498Szrj   unsigned HOST_WIDE_INT vector_alignment
93238fd1498Szrj     = vect_calculate_target_alignment (dr) / BITS_PER_UNIT;
93338fd1498Szrj   DR_TARGET_ALIGNMENT (dr) = vector_alignment;
93438fd1498Szrj 
93538fd1498Szrj   /* No step for BB vectorization.  */
93638fd1498Szrj   if (!loop)
93738fd1498Szrj     {
93838fd1498Szrj       gcc_assert (integer_zerop (drb->step));
93938fd1498Szrj       step_preserves_misalignment_p = true;
94038fd1498Szrj     }
94138fd1498Szrj 
94238fd1498Szrj   /* In case the dataref is in an inner-loop of the loop that is being
94338fd1498Szrj      vectorized (LOOP), we use the base and misalignment information
94438fd1498Szrj      relative to the outer-loop (LOOP).  This is ok only if the misalignment
94538fd1498Szrj      stays the same throughout the execution of the inner-loop, which is why
94638fd1498Szrj      we have to check that the stride of the dataref in the inner-loop evenly
94738fd1498Szrj      divides by the vector alignment.  */
94838fd1498Szrj   else if (nested_in_vect_loop_p (loop, stmt))
94938fd1498Szrj     {
95038fd1498Szrj       step_preserves_misalignment_p
95138fd1498Szrj 	= (DR_STEP_ALIGNMENT (dr) % vector_alignment) == 0;
95238fd1498Szrj 
95338fd1498Szrj       if (dump_enabled_p ())
95438fd1498Szrj 	{
95538fd1498Szrj 	  if (step_preserves_misalignment_p)
95638fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
95738fd1498Szrj 			     "inner step divides the vector alignment.\n");
95838fd1498Szrj 	  else
95938fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
96038fd1498Szrj 			     "inner step doesn't divide the vector"
96138fd1498Szrj 			     " alignment.\n");
96238fd1498Szrj 	}
96338fd1498Szrj     }
96438fd1498Szrj 
96538fd1498Szrj   /* Similarly we can only use base and misalignment information relative to
96638fd1498Szrj      an innermost loop if the misalignment stays the same throughout the
96738fd1498Szrj      execution of the loop.  As above, this is the case if the stride of
96838fd1498Szrj      the dataref evenly divides by the alignment.  */
96938fd1498Szrj   else
97038fd1498Szrj     {
97138fd1498Szrj       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
97238fd1498Szrj       step_preserves_misalignment_p
97338fd1498Szrj 	= multiple_p (DR_STEP_ALIGNMENT (dr) * vf, vector_alignment);
97438fd1498Szrj 
97538fd1498Szrj       if (!step_preserves_misalignment_p && dump_enabled_p ())
97638fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
97738fd1498Szrj 			 "step doesn't divide the vector alignment.\n");
97838fd1498Szrj     }
97938fd1498Szrj 
98038fd1498Szrj   unsigned int base_alignment = drb->base_alignment;
98138fd1498Szrj   unsigned int base_misalignment = drb->base_misalignment;
98238fd1498Szrj 
98338fd1498Szrj   /* Calculate the maximum of the pooled base address alignment and the
98438fd1498Szrj      alignment that we can compute for DR itself.  */
98538fd1498Szrj   innermost_loop_behavior **entry = base_alignments->get (drb->base_address);
98638fd1498Szrj   if (entry && base_alignment < (*entry)->base_alignment)
98738fd1498Szrj     {
98838fd1498Szrj       base_alignment = (*entry)->base_alignment;
98938fd1498Szrj       base_misalignment = (*entry)->base_misalignment;
99038fd1498Szrj     }
99138fd1498Szrj 
99238fd1498Szrj   if (drb->offset_alignment < vector_alignment
99338fd1498Szrj       || !step_preserves_misalignment_p
99438fd1498Szrj       /* We need to know whether the step wrt the vectorized loop is
99538fd1498Szrj 	 negative when computing the starting misalignment below.  */
99638fd1498Szrj       || TREE_CODE (drb->step) != INTEGER_CST)
99738fd1498Szrj     {
99838fd1498Szrj       if (dump_enabled_p ())
99938fd1498Szrj 	{
100038fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
100138fd1498Szrj 	                   "Unknown alignment for access: ");
100238fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
100338fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
100438fd1498Szrj 	}
100538fd1498Szrj       return true;
100638fd1498Szrj     }
100738fd1498Szrj 
100838fd1498Szrj   if (base_alignment < vector_alignment)
100938fd1498Szrj     {
101038fd1498Szrj       unsigned int max_alignment;
101138fd1498Szrj       tree base = get_base_for_alignment (drb->base_address, &max_alignment);
101238fd1498Szrj       if (max_alignment < vector_alignment
101338fd1498Szrj 	  || !vect_can_force_dr_alignment_p (base,
101438fd1498Szrj 					     vector_alignment * BITS_PER_UNIT))
101538fd1498Szrj 	{
101638fd1498Szrj 	  if (dump_enabled_p ())
101738fd1498Szrj 	    {
101838fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
101938fd1498Szrj 	                       "can't force alignment of ref: ");
102038fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
102138fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
102238fd1498Szrj 	    }
102338fd1498Szrj 	  return true;
102438fd1498Szrj 	}
102538fd1498Szrj 
102638fd1498Szrj       /* Force the alignment of the decl.
102738fd1498Szrj 	 NOTE: This is the only change to the code we make during
102838fd1498Szrj 	 the analysis phase, before deciding to vectorize the loop.  */
102938fd1498Szrj       if (dump_enabled_p ())
103038fd1498Szrj         {
103138fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
103238fd1498Szrj           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
103338fd1498Szrj           dump_printf (MSG_NOTE, "\n");
103438fd1498Szrj         }
103538fd1498Szrj 
103638fd1498Szrj       DR_VECT_AUX (dr)->base_decl = base;
103738fd1498Szrj       DR_VECT_AUX (dr)->base_misaligned = true;
103838fd1498Szrj       base_misalignment = 0;
103938fd1498Szrj     }
104038fd1498Szrj   poly_int64 misalignment
104138fd1498Szrj     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
104238fd1498Szrj 
104338fd1498Szrj   /* If this is a backward running DR then first access in the larger
104438fd1498Szrj      vectype actually is N-1 elements before the address in the DR.
104538fd1498Szrj      Adjust misalign accordingly.  */
104638fd1498Szrj   if (tree_int_cst_sgn (drb->step) < 0)
104738fd1498Szrj     /* PLUS because STEP is negative.  */
104838fd1498Szrj     misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
104938fd1498Szrj 		     * TREE_INT_CST_LOW (drb->step));
105038fd1498Szrj 
105138fd1498Szrj   unsigned int const_misalignment;
105238fd1498Szrj   if (!known_misalignment (misalignment, vector_alignment,
105338fd1498Szrj 			   &const_misalignment))
105438fd1498Szrj     {
105538fd1498Szrj       if (dump_enabled_p ())
105638fd1498Szrj 	{
105738fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
105838fd1498Szrj 			   "Non-constant misalignment for access: ");
105938fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
106038fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
106138fd1498Szrj 	}
106238fd1498Szrj       return true;
106338fd1498Szrj     }
106438fd1498Szrj 
106538fd1498Szrj   SET_DR_MISALIGNMENT (dr, const_misalignment);
106638fd1498Szrj 
106738fd1498Szrj   if (dump_enabled_p ())
106838fd1498Szrj     {
106938fd1498Szrj       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
107038fd1498Szrj                        "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
107138fd1498Szrj       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
107238fd1498Szrj       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
107338fd1498Szrj     }
107438fd1498Szrj 
107538fd1498Szrj   return true;
107638fd1498Szrj }
107738fd1498Szrj 
107838fd1498Szrj /* Function vect_update_misalignment_for_peel.
107938fd1498Szrj    Sets DR's misalignment
108038fd1498Szrj    - to 0 if it has the same alignment as DR_PEEL,
108138fd1498Szrj    - to the misalignment computed using NPEEL if DR's salignment is known,
108238fd1498Szrj    - to -1 (unknown) otherwise.
108338fd1498Szrj 
108438fd1498Szrj    DR - the data reference whose misalignment is to be adjusted.
108538fd1498Szrj    DR_PEEL - the data reference whose misalignment is being made
108638fd1498Szrj              zero in the vector loop by the peel.
108738fd1498Szrj    NPEEL - the number of iterations in the peel loop if the misalignment
108838fd1498Szrj            of DR_PEEL is known at compile time.  */
108938fd1498Szrj 
109038fd1498Szrj static void
vect_update_misalignment_for_peel(struct data_reference * dr,struct data_reference * dr_peel,int npeel)109138fd1498Szrj vect_update_misalignment_for_peel (struct data_reference *dr,
109238fd1498Szrj                                    struct data_reference *dr_peel, int npeel)
109338fd1498Szrj {
109438fd1498Szrj   unsigned int i;
109538fd1498Szrj   vec<dr_p> same_aligned_drs;
109638fd1498Szrj   struct data_reference *current_dr;
109738fd1498Szrj   int dr_size = vect_get_scalar_dr_size (dr);
109838fd1498Szrj   int dr_peel_size = vect_get_scalar_dr_size (dr_peel);
109938fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
110038fd1498Szrj   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
110138fd1498Szrj 
110238fd1498Szrj  /* For interleaved data accesses the step in the loop must be multiplied by
110338fd1498Szrj      the size of the interleaving group.  */
110438fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
110538fd1498Szrj     dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
110638fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
110738fd1498Szrj     dr_peel_size *= GROUP_SIZE (peel_stmt_info);
110838fd1498Szrj 
110938fd1498Szrj   /* It can be assumed that the data refs with the same alignment as dr_peel
111038fd1498Szrj      are aligned in the vector loop.  */
111138fd1498Szrj   same_aligned_drs
111238fd1498Szrj     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
111338fd1498Szrj   FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr)
111438fd1498Szrj     {
111538fd1498Szrj       if (current_dr != dr)
111638fd1498Szrj         continue;
111738fd1498Szrj       gcc_assert (!known_alignment_for_access_p (dr)
111838fd1498Szrj 		  || !known_alignment_for_access_p (dr_peel)
111938fd1498Szrj 		  || (DR_MISALIGNMENT (dr) / dr_size
112038fd1498Szrj 		      == DR_MISALIGNMENT (dr_peel) / dr_peel_size));
112138fd1498Szrj       SET_DR_MISALIGNMENT (dr, 0);
112238fd1498Szrj       return;
112338fd1498Szrj     }
112438fd1498Szrj 
112538fd1498Szrj   if (known_alignment_for_access_p (dr)
112638fd1498Szrj       && known_alignment_for_access_p (dr_peel))
112738fd1498Szrj     {
112838fd1498Szrj       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
112938fd1498Szrj       int misal = DR_MISALIGNMENT (dr);
113038fd1498Szrj       misal += negative ? -npeel * dr_size : npeel * dr_size;
113138fd1498Szrj       misal &= DR_TARGET_ALIGNMENT (dr) - 1;
113238fd1498Szrj       SET_DR_MISALIGNMENT (dr, misal);
113338fd1498Szrj       return;
113438fd1498Szrj     }
113538fd1498Szrj 
113638fd1498Szrj   if (dump_enabled_p ())
113738fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
113838fd1498Szrj 		     "to unknown (-1).\n");
113938fd1498Szrj   SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
114038fd1498Szrj }
114138fd1498Szrj 
114238fd1498Szrj 
114338fd1498Szrj /* Function verify_data_ref_alignment
114438fd1498Szrj 
114538fd1498Szrj    Return TRUE if DR can be handled with respect to alignment.  */
114638fd1498Szrj 
114738fd1498Szrj static bool
verify_data_ref_alignment(data_reference_p dr)114838fd1498Szrj verify_data_ref_alignment (data_reference_p dr)
114938fd1498Szrj {
115038fd1498Szrj   enum dr_alignment_support supportable_dr_alignment
115138fd1498Szrj     = vect_supportable_dr_alignment (dr, false);
115238fd1498Szrj   if (!supportable_dr_alignment)
115338fd1498Szrj     {
115438fd1498Szrj       if (dump_enabled_p ())
115538fd1498Szrj 	{
115638fd1498Szrj 	  if (DR_IS_READ (dr))
115738fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
115838fd1498Szrj 			     "not vectorized: unsupported unaligned load.");
115938fd1498Szrj 	  else
116038fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
116138fd1498Szrj 			     "not vectorized: unsupported unaligned "
116238fd1498Szrj 			     "store.");
116338fd1498Szrj 
116438fd1498Szrj 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
116538fd1498Szrj 			     DR_REF (dr));
116638fd1498Szrj 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
116738fd1498Szrj 	}
116838fd1498Szrj       return false;
116938fd1498Szrj     }
117038fd1498Szrj 
117138fd1498Szrj   if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
117238fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
117338fd1498Szrj 		     "Vectorizing an unaligned access.\n");
117438fd1498Szrj 
117538fd1498Szrj   return true;
117638fd1498Szrj }
117738fd1498Szrj 
117838fd1498Szrj /* Function vect_verify_datarefs_alignment
117938fd1498Szrj 
118038fd1498Szrj    Return TRUE if all data references in the loop can be
118138fd1498Szrj    handled with respect to alignment.  */
118238fd1498Szrj 
118338fd1498Szrj bool
vect_verify_datarefs_alignment(loop_vec_info vinfo)118438fd1498Szrj vect_verify_datarefs_alignment (loop_vec_info vinfo)
118538fd1498Szrj {
118638fd1498Szrj   vec<data_reference_p> datarefs = vinfo->datarefs;
118738fd1498Szrj   struct data_reference *dr;
118838fd1498Szrj   unsigned int i;
118938fd1498Szrj 
119038fd1498Szrj   FOR_EACH_VEC_ELT (datarefs, i, dr)
119138fd1498Szrj     {
119238fd1498Szrj       gimple *stmt = DR_STMT (dr);
119338fd1498Szrj       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
119438fd1498Szrj 
119538fd1498Szrj       if (!STMT_VINFO_RELEVANT_P (stmt_info))
119638fd1498Szrj 	continue;
119738fd1498Szrj 
119838fd1498Szrj       /* For interleaving, only the alignment of the first access matters.   */
119938fd1498Szrj       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
120038fd1498Szrj 	  && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
120138fd1498Szrj 	continue;
120238fd1498Szrj 
120338fd1498Szrj       /* Strided accesses perform only component accesses, alignment is
120438fd1498Szrj 	 irrelevant for them.  */
120538fd1498Szrj       if (STMT_VINFO_STRIDED_P (stmt_info)
120638fd1498Szrj 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
120738fd1498Szrj 	continue;
120838fd1498Szrj 
120938fd1498Szrj       if (! verify_data_ref_alignment (dr))
121038fd1498Szrj 	return false;
121138fd1498Szrj     }
121238fd1498Szrj 
121338fd1498Szrj   return true;
121438fd1498Szrj }
121538fd1498Szrj 
121638fd1498Szrj /* Given an memory reference EXP return whether its alignment is less
121738fd1498Szrj    than its size.  */
121838fd1498Szrj 
121938fd1498Szrj static bool
not_size_aligned(tree exp)122038fd1498Szrj not_size_aligned (tree exp)
122138fd1498Szrj {
122238fd1498Szrj   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
122338fd1498Szrj     return true;
122438fd1498Szrj 
122538fd1498Szrj   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
122638fd1498Szrj 	  > get_object_alignment (exp));
122738fd1498Szrj }
122838fd1498Szrj 
122938fd1498Szrj /* Function vector_alignment_reachable_p
123038fd1498Szrj 
123138fd1498Szrj    Return true if vector alignment for DR is reachable by peeling
123238fd1498Szrj    a few loop iterations.  Return false otherwise.  */
123338fd1498Szrj 
123438fd1498Szrj static bool
vector_alignment_reachable_p(struct data_reference * dr)123538fd1498Szrj vector_alignment_reachable_p (struct data_reference *dr)
123638fd1498Szrj {
123738fd1498Szrj   gimple *stmt = DR_STMT (dr);
123838fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
123938fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
124038fd1498Szrj 
124138fd1498Szrj   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
124238fd1498Szrj     {
124338fd1498Szrj       /* For interleaved access we peel only if number of iterations in
124438fd1498Szrj 	 the prolog loop ({VF - misalignment}), is a multiple of the
124538fd1498Szrj 	 number of the interleaved accesses.  */
124638fd1498Szrj       int elem_size, mis_in_elements;
124738fd1498Szrj 
124838fd1498Szrj       /* FORNOW: handle only known alignment.  */
124938fd1498Szrj       if (!known_alignment_for_access_p (dr))
125038fd1498Szrj 	return false;
125138fd1498Szrj 
125238fd1498Szrj       poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
125338fd1498Szrj       poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
125438fd1498Szrj       elem_size = vector_element_size (vector_size, nelements);
125538fd1498Szrj       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
125638fd1498Szrj 
125738fd1498Szrj       if (!multiple_p (nelements - mis_in_elements, GROUP_SIZE (stmt_info)))
125838fd1498Szrj 	return false;
125938fd1498Szrj     }
126038fd1498Szrj 
126138fd1498Szrj   /* If misalignment is known at the compile time then allow peeling
126238fd1498Szrj      only if natural alignment is reachable through peeling.  */
126338fd1498Szrj   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
126438fd1498Szrj     {
126538fd1498Szrj       HOST_WIDE_INT elmsize =
126638fd1498Szrj 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
126738fd1498Szrj       if (dump_enabled_p ())
126838fd1498Szrj 	{
126938fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
127038fd1498Szrj 	                   "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
127138fd1498Szrj 	  dump_printf (MSG_NOTE,
127238fd1498Szrj 	               ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
127338fd1498Szrj 	}
127438fd1498Szrj       if (DR_MISALIGNMENT (dr) % elmsize)
127538fd1498Szrj 	{
127638fd1498Szrj 	  if (dump_enabled_p ())
127738fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
127838fd1498Szrj 	                     "data size does not divide the misalignment.\n");
127938fd1498Szrj 	  return false;
128038fd1498Szrj 	}
128138fd1498Szrj     }
128238fd1498Szrj 
128338fd1498Szrj   if (!known_alignment_for_access_p (dr))
128438fd1498Szrj     {
128538fd1498Szrj       tree type = TREE_TYPE (DR_REF (dr));
128638fd1498Szrj       bool is_packed = not_size_aligned (DR_REF (dr));
128738fd1498Szrj       if (dump_enabled_p ())
128838fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
128938fd1498Szrj 	                 "Unknown misalignment, %snaturally aligned\n",
129038fd1498Szrj 			 is_packed ? "not " : "");
129138fd1498Szrj       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
129238fd1498Szrj     }
129338fd1498Szrj 
129438fd1498Szrj   return true;
129538fd1498Szrj }
129638fd1498Szrj 
129738fd1498Szrj 
129838fd1498Szrj /* Calculate the cost of the memory access represented by DR.  */
129938fd1498Szrj 
130038fd1498Szrj static void
vect_get_data_access_cost(struct data_reference * dr,unsigned int * inside_cost,unsigned int * outside_cost,stmt_vector_for_cost * body_cost_vec)130138fd1498Szrj vect_get_data_access_cost (struct data_reference *dr,
130238fd1498Szrj                            unsigned int *inside_cost,
130338fd1498Szrj                            unsigned int *outside_cost,
130438fd1498Szrj 			   stmt_vector_for_cost *body_cost_vec)
130538fd1498Szrj {
130638fd1498Szrj   gimple *stmt = DR_STMT (dr);
130738fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
130838fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
130938fd1498Szrj   int ncopies;
131038fd1498Szrj 
131138fd1498Szrj   if (PURE_SLP_STMT (stmt_info))
131238fd1498Szrj     ncopies = 1;
131338fd1498Szrj   else
131438fd1498Szrj     ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
131538fd1498Szrj 
131638fd1498Szrj   if (DR_IS_READ (dr))
131738fd1498Szrj     vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
131838fd1498Szrj 			NULL, body_cost_vec, false);
131938fd1498Szrj   else
132038fd1498Szrj     vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
132138fd1498Szrj 
132238fd1498Szrj   if (dump_enabled_p ())
132338fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
132438fd1498Szrj                      "vect_get_data_access_cost: inside_cost = %d, "
132538fd1498Szrj                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
132638fd1498Szrj }
132738fd1498Szrj 
132838fd1498Szrj 
132938fd1498Szrj typedef struct _vect_peel_info
133038fd1498Szrj {
133138fd1498Szrj   struct data_reference *dr;
133238fd1498Szrj   int npeel;
133338fd1498Szrj   unsigned int count;
133438fd1498Szrj } *vect_peel_info;
133538fd1498Szrj 
133638fd1498Szrj typedef struct _vect_peel_extended_info
133738fd1498Szrj {
133838fd1498Szrj   struct _vect_peel_info peel_info;
133938fd1498Szrj   unsigned int inside_cost;
134038fd1498Szrj   unsigned int outside_cost;
134138fd1498Szrj } *vect_peel_extended_info;
134238fd1498Szrj 
134338fd1498Szrj 
134438fd1498Szrj /* Peeling hashtable helpers.  */
134538fd1498Szrj 
134638fd1498Szrj struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
134738fd1498Szrj {
134838fd1498Szrj   static inline hashval_t hash (const _vect_peel_info *);
134938fd1498Szrj   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
135038fd1498Szrj };
135138fd1498Szrj 
135238fd1498Szrj inline hashval_t
hash(const _vect_peel_info * peel_info)135338fd1498Szrj peel_info_hasher::hash (const _vect_peel_info *peel_info)
135438fd1498Szrj {
135538fd1498Szrj   return (hashval_t) peel_info->npeel;
135638fd1498Szrj }
135738fd1498Szrj 
135838fd1498Szrj inline bool
equal(const _vect_peel_info * a,const _vect_peel_info * b)135938fd1498Szrj peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
136038fd1498Szrj {
136138fd1498Szrj   return (a->npeel == b->npeel);
136238fd1498Szrj }
136338fd1498Szrj 
136438fd1498Szrj 
136538fd1498Szrj /* Insert DR into peeling hash table with NPEEL as key.  */
136638fd1498Szrj 
136738fd1498Szrj static void
vect_peeling_hash_insert(hash_table<peel_info_hasher> * peeling_htab,loop_vec_info loop_vinfo,struct data_reference * dr,int npeel)136838fd1498Szrj vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
136938fd1498Szrj 			  loop_vec_info loop_vinfo, struct data_reference *dr,
137038fd1498Szrj                           int npeel)
137138fd1498Szrj {
137238fd1498Szrj   struct _vect_peel_info elem, *slot;
137338fd1498Szrj   _vect_peel_info **new_slot;
137438fd1498Szrj   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
137538fd1498Szrj 
137638fd1498Szrj   elem.npeel = npeel;
137738fd1498Szrj   slot = peeling_htab->find (&elem);
137838fd1498Szrj   if (slot)
137938fd1498Szrj     slot->count++;
138038fd1498Szrj   else
138138fd1498Szrj     {
138238fd1498Szrj       slot = XNEW (struct _vect_peel_info);
138338fd1498Szrj       slot->npeel = npeel;
138438fd1498Szrj       slot->dr = dr;
138538fd1498Szrj       slot->count = 1;
138638fd1498Szrj       new_slot = peeling_htab->find_slot (slot, INSERT);
138738fd1498Szrj       *new_slot = slot;
138838fd1498Szrj     }
138938fd1498Szrj 
139038fd1498Szrj   if (!supportable_dr_alignment
139138fd1498Szrj       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
139238fd1498Szrj     slot->count += VECT_MAX_COST;
139338fd1498Szrj }
139438fd1498Szrj 
139538fd1498Szrj 
139638fd1498Szrj /* Traverse peeling hash table to find peeling option that aligns maximum
139738fd1498Szrj    number of data accesses.  */
139838fd1498Szrj 
139938fd1498Szrj int
vect_peeling_hash_get_most_frequent(_vect_peel_info ** slot,_vect_peel_extended_info * max)140038fd1498Szrj vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
140138fd1498Szrj 				     _vect_peel_extended_info *max)
140238fd1498Szrj {
140338fd1498Szrj   vect_peel_info elem = *slot;
140438fd1498Szrj 
140538fd1498Szrj   if (elem->count > max->peel_info.count
140638fd1498Szrj       || (elem->count == max->peel_info.count
140738fd1498Szrj           && max->peel_info.npeel > elem->npeel))
140838fd1498Szrj     {
140938fd1498Szrj       max->peel_info.npeel = elem->npeel;
141038fd1498Szrj       max->peel_info.count = elem->count;
141138fd1498Szrj       max->peel_info.dr = elem->dr;
141238fd1498Szrj     }
141338fd1498Szrj 
141438fd1498Szrj   return 1;
141538fd1498Szrj }
141638fd1498Szrj 
141738fd1498Szrj /* Get the costs of peeling NPEEL iterations checking data access costs
141838fd1498Szrj    for all data refs.  If UNKNOWN_MISALIGNMENT is true, we assume DR0's
141938fd1498Szrj    misalignment will be zero after peeling.  */
142038fd1498Szrj 
142138fd1498Szrj static void
vect_get_peeling_costs_all_drs(vec<data_reference_p> datarefs,struct data_reference * dr0,unsigned int * inside_cost,unsigned int * outside_cost,stmt_vector_for_cost * body_cost_vec,unsigned int npeel,bool unknown_misalignment)142238fd1498Szrj vect_get_peeling_costs_all_drs (vec<data_reference_p> datarefs,
142338fd1498Szrj 				struct data_reference *dr0,
142438fd1498Szrj 				unsigned int *inside_cost,
142538fd1498Szrj 				unsigned int *outside_cost,
142638fd1498Szrj 				stmt_vector_for_cost *body_cost_vec,
142738fd1498Szrj 				unsigned int npeel,
142838fd1498Szrj 				bool unknown_misalignment)
142938fd1498Szrj {
143038fd1498Szrj   unsigned i;
143138fd1498Szrj   data_reference *dr;
143238fd1498Szrj 
143338fd1498Szrj   FOR_EACH_VEC_ELT (datarefs, i, dr)
143438fd1498Szrj     {
143538fd1498Szrj       gimple *stmt = DR_STMT (dr);
143638fd1498Szrj       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
143738fd1498Szrj       if (!STMT_VINFO_RELEVANT_P (stmt_info))
143838fd1498Szrj 	continue;
143938fd1498Szrj 
144038fd1498Szrj       /* For interleaving, only the alignment of the first access
144138fd1498Szrj          matters.  */
144238fd1498Szrj       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
144338fd1498Szrj           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
144438fd1498Szrj         continue;
144538fd1498Szrj 
144638fd1498Szrj       /* Strided accesses perform only component accesses, alignment is
144738fd1498Szrj          irrelevant for them.  */
144838fd1498Szrj       if (STMT_VINFO_STRIDED_P (stmt_info)
144938fd1498Szrj 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
145038fd1498Szrj 	continue;
145138fd1498Szrj 
145238fd1498Szrj       int save_misalignment;
145338fd1498Szrj       save_misalignment = DR_MISALIGNMENT (dr);
145438fd1498Szrj       if (npeel == 0)
145538fd1498Szrj 	;
145638fd1498Szrj       else if (unknown_misalignment && dr == dr0)
145738fd1498Szrj 	SET_DR_MISALIGNMENT (dr, 0);
145838fd1498Szrj       else
145938fd1498Szrj 	vect_update_misalignment_for_peel (dr, dr0, npeel);
146038fd1498Szrj       vect_get_data_access_cost (dr, inside_cost, outside_cost,
146138fd1498Szrj 				 body_cost_vec);
146238fd1498Szrj       SET_DR_MISALIGNMENT (dr, save_misalignment);
146338fd1498Szrj     }
146438fd1498Szrj }
146538fd1498Szrj 
146638fd1498Szrj /* Traverse peeling hash table and calculate cost for each peeling option.
146738fd1498Szrj    Find the one with the lowest cost.  */
146838fd1498Szrj 
146938fd1498Szrj int
vect_peeling_hash_get_lowest_cost(_vect_peel_info ** slot,_vect_peel_extended_info * min)147038fd1498Szrj vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
147138fd1498Szrj 				   _vect_peel_extended_info *min)
147238fd1498Szrj {
147338fd1498Szrj   vect_peel_info elem = *slot;
147438fd1498Szrj   int dummy;
147538fd1498Szrj   unsigned int inside_cost = 0, outside_cost = 0;
147638fd1498Szrj   gimple *stmt = DR_STMT (elem->dr);
147738fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
147838fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
147938fd1498Szrj   stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
148038fd1498Szrj 		       epilogue_cost_vec;
148138fd1498Szrj 
148238fd1498Szrj   prologue_cost_vec.create (2);
148338fd1498Szrj   body_cost_vec.create (2);
148438fd1498Szrj   epilogue_cost_vec.create (2);
148538fd1498Szrj 
148638fd1498Szrj   vect_get_peeling_costs_all_drs (LOOP_VINFO_DATAREFS (loop_vinfo),
148738fd1498Szrj 				  elem->dr, &inside_cost, &outside_cost,
148838fd1498Szrj 				  &body_cost_vec, elem->npeel, false);
148938fd1498Szrj 
149038fd1498Szrj   body_cost_vec.release ();
149138fd1498Szrj 
149238fd1498Szrj   outside_cost += vect_get_known_peeling_cost
149338fd1498Szrj     (loop_vinfo, elem->npeel, &dummy,
149438fd1498Szrj      &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
149538fd1498Szrj      &prologue_cost_vec, &epilogue_cost_vec);
149638fd1498Szrj 
149738fd1498Szrj   /* Prologue and epilogue costs are added to the target model later.
149838fd1498Szrj      These costs depend only on the scalar iteration cost, the
149938fd1498Szrj      number of peeling iterations finally chosen, and the number of
150038fd1498Szrj      misaligned statements.  So discard the information found here.  */
150138fd1498Szrj   prologue_cost_vec.release ();
150238fd1498Szrj   epilogue_cost_vec.release ();
150338fd1498Szrj 
150438fd1498Szrj   if (inside_cost < min->inside_cost
150538fd1498Szrj       || (inside_cost == min->inside_cost
150638fd1498Szrj 	  && outside_cost < min->outside_cost))
150738fd1498Szrj     {
150838fd1498Szrj       min->inside_cost = inside_cost;
150938fd1498Szrj       min->outside_cost = outside_cost;
151038fd1498Szrj       min->peel_info.dr = elem->dr;
151138fd1498Szrj       min->peel_info.npeel = elem->npeel;
151238fd1498Szrj       min->peel_info.count = elem->count;
151338fd1498Szrj     }
151438fd1498Szrj 
151538fd1498Szrj   return 1;
151638fd1498Szrj }
151738fd1498Szrj 
151838fd1498Szrj 
151938fd1498Szrj /* Choose best peeling option by traversing peeling hash table and either
152038fd1498Szrj    choosing an option with the lowest cost (if cost model is enabled) or the
152138fd1498Szrj    option that aligns as many accesses as possible.  */
152238fd1498Szrj 
152338fd1498Szrj static struct _vect_peel_extended_info
vect_peeling_hash_choose_best_peeling(hash_table<peel_info_hasher> * peeling_htab,loop_vec_info loop_vinfo)152438fd1498Szrj vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
152538fd1498Szrj 				       loop_vec_info loop_vinfo)
152638fd1498Szrj {
152738fd1498Szrj    struct _vect_peel_extended_info res;
152838fd1498Szrj 
152938fd1498Szrj    res.peel_info.dr = NULL;
153038fd1498Szrj 
153138fd1498Szrj    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
153238fd1498Szrj      {
153338fd1498Szrj        res.inside_cost = INT_MAX;
153438fd1498Szrj        res.outside_cost = INT_MAX;
153538fd1498Szrj        peeling_htab->traverse <_vect_peel_extended_info *,
153638fd1498Szrj 	   		       vect_peeling_hash_get_lowest_cost> (&res);
153738fd1498Szrj      }
153838fd1498Szrj    else
153938fd1498Szrj      {
154038fd1498Szrj        res.peel_info.count = 0;
154138fd1498Szrj        peeling_htab->traverse <_vect_peel_extended_info *,
154238fd1498Szrj 	   		       vect_peeling_hash_get_most_frequent> (&res);
154338fd1498Szrj        res.inside_cost = 0;
154438fd1498Szrj        res.outside_cost = 0;
154538fd1498Szrj      }
154638fd1498Szrj 
154738fd1498Szrj    return res;
154838fd1498Szrj }
154938fd1498Szrj 
155038fd1498Szrj /* Return true if the new peeling NPEEL is supported.  */
155138fd1498Szrj 
155238fd1498Szrj static bool
vect_peeling_supportable(loop_vec_info loop_vinfo,struct data_reference * dr0,unsigned npeel)155338fd1498Szrj vect_peeling_supportable (loop_vec_info loop_vinfo, struct data_reference *dr0,
155438fd1498Szrj 			  unsigned npeel)
155538fd1498Szrj {
155638fd1498Szrj   unsigned i;
155738fd1498Szrj   struct data_reference *dr = NULL;
155838fd1498Szrj   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
155938fd1498Szrj   gimple *stmt;
156038fd1498Szrj   stmt_vec_info stmt_info;
156138fd1498Szrj   enum dr_alignment_support supportable_dr_alignment;
156238fd1498Szrj 
156338fd1498Szrj   /* Ensure that all data refs can be vectorized after the peel.  */
156438fd1498Szrj   FOR_EACH_VEC_ELT (datarefs, i, dr)
156538fd1498Szrj     {
156638fd1498Szrj       int save_misalignment;
156738fd1498Szrj 
156838fd1498Szrj       if (dr == dr0)
156938fd1498Szrj 	continue;
157038fd1498Szrj 
157138fd1498Szrj       stmt = DR_STMT (dr);
157238fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
157338fd1498Szrj       /* For interleaving, only the alignment of the first access
157438fd1498Szrj 	 matters.  */
157538fd1498Szrj       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
157638fd1498Szrj 	  && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
157738fd1498Szrj 	continue;
157838fd1498Szrj 
157938fd1498Szrj       /* Strided accesses perform only component accesses, alignment is
158038fd1498Szrj 	 irrelevant for them.  */
158138fd1498Szrj       if (STMT_VINFO_STRIDED_P (stmt_info)
158238fd1498Szrj 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
158338fd1498Szrj 	continue;
158438fd1498Szrj 
158538fd1498Szrj       save_misalignment = DR_MISALIGNMENT (dr);
158638fd1498Szrj       vect_update_misalignment_for_peel (dr, dr0, npeel);
158738fd1498Szrj       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
158838fd1498Szrj       SET_DR_MISALIGNMENT (dr, save_misalignment);
158938fd1498Szrj 
159038fd1498Szrj       if (!supportable_dr_alignment)
159138fd1498Szrj 	return false;
159238fd1498Szrj     }
159338fd1498Szrj 
159438fd1498Szrj   return true;
159538fd1498Szrj }
159638fd1498Szrj 
159738fd1498Szrj /* Function vect_enhance_data_refs_alignment
159838fd1498Szrj 
159938fd1498Szrj    This pass will use loop versioning and loop peeling in order to enhance
160038fd1498Szrj    the alignment of data references in the loop.
160138fd1498Szrj 
160238fd1498Szrj    FOR NOW: we assume that whatever versioning/peeling takes place, only the
160338fd1498Szrj    original loop is to be vectorized.  Any other loops that are created by
160438fd1498Szrj    the transformations performed in this pass - are not supposed to be
160538fd1498Szrj    vectorized.  This restriction will be relaxed.
160638fd1498Szrj 
160738fd1498Szrj    This pass will require a cost model to guide it whether to apply peeling
160838fd1498Szrj    or versioning or a combination of the two.  For example, the scheme that
160938fd1498Szrj    intel uses when given a loop with several memory accesses, is as follows:
161038fd1498Szrj    choose one memory access ('p') which alignment you want to force by doing
161138fd1498Szrj    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
161238fd1498Szrj    other accesses are not necessarily aligned, or (2) use loop versioning to
161338fd1498Szrj    generate one loop in which all accesses are aligned, and another loop in
161438fd1498Szrj    which only 'p' is necessarily aligned.
161538fd1498Szrj 
161638fd1498Szrj    ("Automatic Intra-Register Vectorization for the Intel Architecture",
161738fd1498Szrj    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
161838fd1498Szrj    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
161938fd1498Szrj 
162038fd1498Szrj    Devising a cost model is the most critical aspect of this work.  It will
162138fd1498Szrj    guide us on which access to peel for, whether to use loop versioning, how
162238fd1498Szrj    many versions to create, etc.  The cost model will probably consist of
162338fd1498Szrj    generic considerations as well as target specific considerations (on
162438fd1498Szrj    powerpc for example, misaligned stores are more painful than misaligned
162538fd1498Szrj    loads).
162638fd1498Szrj 
162738fd1498Szrj    Here are the general steps involved in alignment enhancements:
162838fd1498Szrj 
162938fd1498Szrj      -- original loop, before alignment analysis:
163038fd1498Szrj 	for (i=0; i<N; i++){
163138fd1498Szrj 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
163238fd1498Szrj 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
163338fd1498Szrj 	}
163438fd1498Szrj 
163538fd1498Szrj      -- After vect_compute_data_refs_alignment:
163638fd1498Szrj 	for (i=0; i<N; i++){
163738fd1498Szrj 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
163838fd1498Szrj 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
163938fd1498Szrj 	}
164038fd1498Szrj 
164138fd1498Szrj      -- Possibility 1: we do loop versioning:
164238fd1498Szrj      if (p is aligned) {
164338fd1498Szrj 	for (i=0; i<N; i++){	# loop 1A
164438fd1498Szrj 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
164538fd1498Szrj 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
164638fd1498Szrj 	}
164738fd1498Szrj      }
164838fd1498Szrj      else {
164938fd1498Szrj 	for (i=0; i<N; i++){	# loop 1B
165038fd1498Szrj 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
165138fd1498Szrj 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
165238fd1498Szrj 	}
165338fd1498Szrj      }
165438fd1498Szrj 
165538fd1498Szrj      -- Possibility 2: we do loop peeling:
165638fd1498Szrj      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
165738fd1498Szrj 	x = q[i];
165838fd1498Szrj 	p[i] = y;
165938fd1498Szrj      }
166038fd1498Szrj      for (i = 3; i < N; i++){	# loop 2A
166138fd1498Szrj 	x = q[i];			# DR_MISALIGNMENT(q) = 0
166238fd1498Szrj 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
166338fd1498Szrj      }
166438fd1498Szrj 
166538fd1498Szrj      -- Possibility 3: combination of loop peeling and versioning:
166638fd1498Szrj      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
166738fd1498Szrj 	x = q[i];
166838fd1498Szrj 	p[i] = y;
166938fd1498Szrj      }
167038fd1498Szrj      if (p is aligned) {
167138fd1498Szrj 	for (i = 3; i<N; i++){	# loop 3A
167238fd1498Szrj 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
167338fd1498Szrj 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
167438fd1498Szrj 	}
167538fd1498Szrj      }
167638fd1498Szrj      else {
167738fd1498Szrj 	for (i = 3; i<N; i++){	# loop 3B
167838fd1498Szrj 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
167938fd1498Szrj 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
168038fd1498Szrj 	}
168138fd1498Szrj      }
168238fd1498Szrj 
168338fd1498Szrj      These loops are later passed to loop_transform to be vectorized.  The
168438fd1498Szrj      vectorizer will use the alignment information to guide the transformation
168538fd1498Szrj      (whether to generate regular loads/stores, or with special handling for
168638fd1498Szrj      misalignment).  */
168738fd1498Szrj 
168838fd1498Szrj bool
vect_enhance_data_refs_alignment(loop_vec_info loop_vinfo)168938fd1498Szrj vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
169038fd1498Szrj {
169138fd1498Szrj   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
169238fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
169338fd1498Szrj   enum dr_alignment_support supportable_dr_alignment;
169438fd1498Szrj   struct data_reference *dr0 = NULL, *first_store = NULL;
169538fd1498Szrj   struct data_reference *dr;
169638fd1498Szrj   unsigned int i, j;
169738fd1498Szrj   bool do_peeling = false;
169838fd1498Szrj   bool do_versioning = false;
169938fd1498Szrj   bool stat;
170038fd1498Szrj   gimple *stmt;
170138fd1498Szrj   stmt_vec_info stmt_info;
170238fd1498Szrj   unsigned int npeel = 0;
170338fd1498Szrj   bool one_misalignment_known = false;
170438fd1498Szrj   bool one_misalignment_unknown = false;
170538fd1498Szrj   bool one_dr_unsupportable = false;
170638fd1498Szrj   struct data_reference *unsupportable_dr = NULL;
170738fd1498Szrj   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
170838fd1498Szrj   unsigned possible_npeel_number = 1;
170938fd1498Szrj   tree vectype;
171038fd1498Szrj   unsigned int mis, same_align_drs_max = 0;
171138fd1498Szrj   hash_table<peel_info_hasher> peeling_htab (1);
171238fd1498Szrj 
171338fd1498Szrj   if (dump_enabled_p ())
171438fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
171538fd1498Szrj                      "=== vect_enhance_data_refs_alignment ===\n");
171638fd1498Szrj 
171738fd1498Szrj   /* Reset data so we can safely be called multiple times.  */
171838fd1498Szrj   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
171938fd1498Szrj   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
172038fd1498Szrj 
172138fd1498Szrj   /* While cost model enhancements are expected in the future, the high level
172238fd1498Szrj      view of the code at this time is as follows:
172338fd1498Szrj 
172438fd1498Szrj      A) If there is a misaligned access then see if peeling to align
172538fd1498Szrj         this access can make all data references satisfy
172638fd1498Szrj         vect_supportable_dr_alignment.  If so, update data structures
172738fd1498Szrj         as needed and return true.
172838fd1498Szrj 
172938fd1498Szrj      B) If peeling wasn't possible and there is a data reference with an
173038fd1498Szrj         unknown misalignment that does not satisfy vect_supportable_dr_alignment
173138fd1498Szrj         then see if loop versioning checks can be used to make all data
173238fd1498Szrj         references satisfy vect_supportable_dr_alignment.  If so, update
173338fd1498Szrj         data structures as needed and return true.
173438fd1498Szrj 
173538fd1498Szrj      C) If neither peeling nor versioning were successful then return false if
173638fd1498Szrj         any data reference does not satisfy vect_supportable_dr_alignment.
173738fd1498Szrj 
173838fd1498Szrj      D) Return true (all data references satisfy vect_supportable_dr_alignment).
173938fd1498Szrj 
174038fd1498Szrj      Note, Possibility 3 above (which is peeling and versioning together) is not
174138fd1498Szrj      being done at this time.  */
174238fd1498Szrj 
174338fd1498Szrj   /* (1) Peeling to force alignment.  */
174438fd1498Szrj 
174538fd1498Szrj   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
174638fd1498Szrj      Considerations:
174738fd1498Szrj      + How many accesses will become aligned due to the peeling
174838fd1498Szrj      - How many accesses will become unaligned due to the peeling,
174938fd1498Szrj        and the cost of misaligned accesses.
175038fd1498Szrj      - The cost of peeling (the extra runtime checks, the increase
175138fd1498Szrj        in code size).  */
175238fd1498Szrj 
175338fd1498Szrj   FOR_EACH_VEC_ELT (datarefs, i, dr)
175438fd1498Szrj     {
175538fd1498Szrj       stmt = DR_STMT (dr);
175638fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
175738fd1498Szrj 
175838fd1498Szrj       if (!STMT_VINFO_RELEVANT_P (stmt_info))
175938fd1498Szrj 	continue;
176038fd1498Szrj 
176138fd1498Szrj       /* For interleaving, only the alignment of the first access
176238fd1498Szrj          matters.  */
176338fd1498Szrj       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
176438fd1498Szrj           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
176538fd1498Szrj         continue;
176638fd1498Szrj 
176738fd1498Szrj       /* For invariant accesses there is nothing to enhance.  */
176838fd1498Szrj       if (integer_zerop (DR_STEP (dr)))
176938fd1498Szrj 	continue;
177038fd1498Szrj 
177138fd1498Szrj       /* Strided accesses perform only component accesses, alignment is
177238fd1498Szrj 	 irrelevant for them.  */
177338fd1498Szrj       if (STMT_VINFO_STRIDED_P (stmt_info)
177438fd1498Szrj 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
177538fd1498Szrj 	continue;
177638fd1498Szrj 
177738fd1498Szrj       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
177838fd1498Szrj       do_peeling = vector_alignment_reachable_p (dr);
177938fd1498Szrj       if (do_peeling)
178038fd1498Szrj         {
178138fd1498Szrj           if (known_alignment_for_access_p (dr))
178238fd1498Szrj             {
178338fd1498Szrj 	      unsigned int npeel_tmp = 0;
178438fd1498Szrj 	      bool negative = tree_int_cst_compare (DR_STEP (dr),
178538fd1498Szrj 						    size_zero_node) < 0;
178638fd1498Szrj 
178738fd1498Szrj 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
178838fd1498Szrj 	      unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
178938fd1498Szrj 	      unsigned int dr_size = vect_get_scalar_dr_size (dr);
179038fd1498Szrj 	      mis = (negative ? DR_MISALIGNMENT (dr) : -DR_MISALIGNMENT (dr));
179138fd1498Szrj 	      if (DR_MISALIGNMENT (dr) != 0)
179238fd1498Szrj 		npeel_tmp = (mis & (target_align - 1)) / dr_size;
179338fd1498Szrj 
179438fd1498Szrj               /* For multiple types, it is possible that the bigger type access
179538fd1498Szrj                  will have more than one peeling option.  E.g., a loop with two
179638fd1498Szrj                  types: one of size (vector size / 4), and the other one of
179738fd1498Szrj                  size (vector size / 8).  Vectorization factor will 8.  If both
179838fd1498Szrj                  accesses are misaligned by 3, the first one needs one scalar
179938fd1498Szrj                  iteration to be aligned, and the second one needs 5.  But the
180038fd1498Szrj 		 first one will be aligned also by peeling 5 scalar
180138fd1498Szrj                  iterations, and in that case both accesses will be aligned.
180238fd1498Szrj                  Hence, except for the immediate peeling amount, we also want
180338fd1498Szrj                  to try to add full vector size, while we don't exceed
180438fd1498Szrj                  vectorization factor.
180538fd1498Szrj                  We do this automatically for cost model, since we calculate
180638fd1498Szrj 		 cost for every peeling option.  */
180738fd1498Szrj               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
180838fd1498Szrj 		{
180938fd1498Szrj 		  poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info)
181038fd1498Szrj 					  ? vf * GROUP_SIZE (stmt_info) : vf);
181138fd1498Szrj 		  possible_npeel_number
181238fd1498Szrj 		    = vect_get_num_vectors (nscalars, vectype);
181338fd1498Szrj 
181438fd1498Szrj 		  /* NPEEL_TMP is 0 when there is no misalignment, but also
181538fd1498Szrj 		     allow peeling NELEMENTS.  */
181638fd1498Szrj 		  if (DR_MISALIGNMENT (dr) == 0)
181738fd1498Szrj 		    possible_npeel_number++;
181838fd1498Szrj 		}
181938fd1498Szrj 
182038fd1498Szrj 	      /* Save info about DR in the hash table.  Also include peeling
182138fd1498Szrj 	         amounts according to the explanation above.  */
182238fd1498Szrj               for (j = 0; j < possible_npeel_number; j++)
182338fd1498Szrj                 {
182438fd1498Szrj                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
182538fd1498Szrj 					    dr, npeel_tmp);
182638fd1498Szrj 		  npeel_tmp += target_align / dr_size;
182738fd1498Szrj                 }
182838fd1498Szrj 
182938fd1498Szrj 	      one_misalignment_known = true;
183038fd1498Szrj             }
183138fd1498Szrj           else
183238fd1498Szrj             {
183338fd1498Szrj               /* If we don't know any misalignment values, we prefer
183438fd1498Szrj                  peeling for data-ref that has the maximum number of data-refs
183538fd1498Szrj                  with the same alignment, unless the target prefers to align
183638fd1498Szrj                  stores over load.  */
183738fd1498Szrj 	      unsigned same_align_drs
183838fd1498Szrj 		= STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
183938fd1498Szrj 	      if (!dr0
184038fd1498Szrj 		  || same_align_drs_max < same_align_drs)
184138fd1498Szrj 		{
184238fd1498Szrj 		  same_align_drs_max = same_align_drs;
184338fd1498Szrj 		  dr0 = dr;
184438fd1498Szrj 		}
184538fd1498Szrj 	      /* For data-refs with the same number of related
184638fd1498Szrj 		 accesses prefer the one where the misalign
184738fd1498Szrj 		 computation will be invariant in the outermost loop.  */
184838fd1498Szrj 	      else if (same_align_drs_max == same_align_drs)
184938fd1498Szrj 		{
185038fd1498Szrj 		  struct loop *ivloop0, *ivloop;
185138fd1498Szrj 		  ivloop0 = outermost_invariant_loop_for_expr
185238fd1498Szrj 		    (loop, DR_BASE_ADDRESS (dr0));
185338fd1498Szrj 		  ivloop = outermost_invariant_loop_for_expr
185438fd1498Szrj 		    (loop, DR_BASE_ADDRESS (dr));
185538fd1498Szrj 		  if ((ivloop && !ivloop0)
185638fd1498Szrj 		      || (ivloop && ivloop0
185738fd1498Szrj 			  && flow_loop_nested_p (ivloop, ivloop0)))
185838fd1498Szrj 		    dr0 = dr;
185938fd1498Szrj 		}
186038fd1498Szrj 
186138fd1498Szrj 	      one_misalignment_unknown = true;
186238fd1498Szrj 
186338fd1498Szrj 	      /* Check for data refs with unsupportable alignment that
186438fd1498Szrj 	         can be peeled.  */
186538fd1498Szrj 	      if (!supportable_dr_alignment)
186638fd1498Szrj 	      {
186738fd1498Szrj 		one_dr_unsupportable = true;
186838fd1498Szrj 		unsupportable_dr = dr;
186938fd1498Szrj 	      }
187038fd1498Szrj 
187138fd1498Szrj 	      if (!first_store && DR_IS_WRITE (dr))
187238fd1498Szrj 		first_store = dr;
187338fd1498Szrj             }
187438fd1498Szrj         }
187538fd1498Szrj       else
187638fd1498Szrj         {
187738fd1498Szrj           if (!aligned_access_p (dr))
187838fd1498Szrj             {
187938fd1498Szrj               if (dump_enabled_p ())
188038fd1498Szrj                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
188138fd1498Szrj                                  "vector alignment may not be reachable\n");
188238fd1498Szrj               break;
188338fd1498Szrj             }
188438fd1498Szrj         }
188538fd1498Szrj     }
188638fd1498Szrj 
188738fd1498Szrj   /* Check if we can possibly peel the loop.  */
188838fd1498Szrj   if (!vect_can_advance_ivs_p (loop_vinfo)
188938fd1498Szrj       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
189038fd1498Szrj       || loop->inner)
189138fd1498Szrj     do_peeling = false;
189238fd1498Szrj 
189338fd1498Szrj   struct _vect_peel_extended_info peel_for_known_alignment;
189438fd1498Szrj   struct _vect_peel_extended_info peel_for_unknown_alignment;
189538fd1498Szrj   struct _vect_peel_extended_info best_peel;
189638fd1498Szrj 
189738fd1498Szrj   peel_for_unknown_alignment.inside_cost = INT_MAX;
189838fd1498Szrj   peel_for_unknown_alignment.outside_cost = INT_MAX;
189938fd1498Szrj   peel_for_unknown_alignment.peel_info.count = 0;
190038fd1498Szrj 
190138fd1498Szrj   if (do_peeling
190238fd1498Szrj       && one_misalignment_unknown)
190338fd1498Szrj     {
190438fd1498Szrj       /* Check if the target requires to prefer stores over loads, i.e., if
190538fd1498Szrj          misaligned stores are more expensive than misaligned loads (taking
190638fd1498Szrj          drs with same alignment into account).  */
190738fd1498Szrj       unsigned int load_inside_cost = 0;
190838fd1498Szrj       unsigned int load_outside_cost = 0;
190938fd1498Szrj       unsigned int store_inside_cost = 0;
191038fd1498Szrj       unsigned int store_outside_cost = 0;
191138fd1498Szrj       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
191238fd1498Szrj 
191338fd1498Szrj       stmt_vector_for_cost dummy;
191438fd1498Szrj       dummy.create (2);
191538fd1498Szrj       vect_get_peeling_costs_all_drs (datarefs, dr0,
191638fd1498Szrj 				      &load_inside_cost,
191738fd1498Szrj 				      &load_outside_cost,
191838fd1498Szrj 				      &dummy, estimated_npeels, true);
191938fd1498Szrj       dummy.release ();
192038fd1498Szrj 
192138fd1498Szrj       if (first_store)
192238fd1498Szrj 	{
192338fd1498Szrj 	  dummy.create (2);
192438fd1498Szrj 	  vect_get_peeling_costs_all_drs (datarefs, first_store,
192538fd1498Szrj 					  &store_inside_cost,
192638fd1498Szrj 					  &store_outside_cost,
192738fd1498Szrj 					  &dummy, estimated_npeels, true);
192838fd1498Szrj 	  dummy.release ();
192938fd1498Szrj 	}
193038fd1498Szrj       else
193138fd1498Szrj 	{
193238fd1498Szrj 	  store_inside_cost = INT_MAX;
193338fd1498Szrj 	  store_outside_cost = INT_MAX;
193438fd1498Szrj 	}
193538fd1498Szrj 
193638fd1498Szrj       if (load_inside_cost > store_inside_cost
193738fd1498Szrj 	  || (load_inside_cost == store_inside_cost
193838fd1498Szrj 	      && load_outside_cost > store_outside_cost))
193938fd1498Szrj 	{
194038fd1498Szrj 	  dr0 = first_store;
194138fd1498Szrj 	  peel_for_unknown_alignment.inside_cost = store_inside_cost;
194238fd1498Szrj 	  peel_for_unknown_alignment.outside_cost = store_outside_cost;
194338fd1498Szrj 	}
194438fd1498Szrj       else
194538fd1498Szrj 	{
194638fd1498Szrj 	  peel_for_unknown_alignment.inside_cost = load_inside_cost;
194738fd1498Szrj 	  peel_for_unknown_alignment.outside_cost = load_outside_cost;
194838fd1498Szrj 	}
194938fd1498Szrj 
195038fd1498Szrj       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
195138fd1498Szrj       prologue_cost_vec.create (2);
195238fd1498Szrj       epilogue_cost_vec.create (2);
195338fd1498Szrj 
195438fd1498Szrj       int dummy2;
195538fd1498Szrj       peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
195638fd1498Szrj 	(loop_vinfo, estimated_npeels, &dummy2,
195738fd1498Szrj 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
195838fd1498Szrj 	 &prologue_cost_vec, &epilogue_cost_vec);
195938fd1498Szrj 
196038fd1498Szrj       prologue_cost_vec.release ();
196138fd1498Szrj       epilogue_cost_vec.release ();
196238fd1498Szrj 
196338fd1498Szrj       peel_for_unknown_alignment.peel_info.count = 1
196438fd1498Szrj 	+ STMT_VINFO_SAME_ALIGN_REFS
196538fd1498Szrj 	(vinfo_for_stmt (DR_STMT (dr0))).length ();
196638fd1498Szrj     }
196738fd1498Szrj 
196838fd1498Szrj   peel_for_unknown_alignment.peel_info.npeel = 0;
196938fd1498Szrj   peel_for_unknown_alignment.peel_info.dr = dr0;
197038fd1498Szrj 
197138fd1498Szrj   best_peel = peel_for_unknown_alignment;
197238fd1498Szrj 
197338fd1498Szrj   peel_for_known_alignment.inside_cost = INT_MAX;
197438fd1498Szrj   peel_for_known_alignment.outside_cost = INT_MAX;
197538fd1498Szrj   peel_for_known_alignment.peel_info.count = 0;
197638fd1498Szrj   peel_for_known_alignment.peel_info.dr = NULL;
197738fd1498Szrj 
197838fd1498Szrj   if (do_peeling && one_misalignment_known)
197938fd1498Szrj     {
198038fd1498Szrj       /* Peeling is possible, but there is no data access that is not supported
198138fd1498Szrj          unless aligned.  So we try to choose the best possible peeling from
198238fd1498Szrj 	 the hash table.  */
198338fd1498Szrj       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
198438fd1498Szrj 	(&peeling_htab, loop_vinfo);
198538fd1498Szrj     }
198638fd1498Szrj 
198738fd1498Szrj   /* Compare costs of peeling for known and unknown alignment. */
198838fd1498Szrj   if (peel_for_known_alignment.peel_info.dr != NULL
198938fd1498Szrj       && peel_for_unknown_alignment.inside_cost
199038fd1498Szrj       >= peel_for_known_alignment.inside_cost)
199138fd1498Szrj     {
199238fd1498Szrj       best_peel = peel_for_known_alignment;
199338fd1498Szrj 
199438fd1498Szrj       /* If the best peeling for known alignment has NPEEL == 0, perform no
199538fd1498Szrj          peeling at all except if there is an unsupportable dr that we can
199638fd1498Szrj          align.  */
199738fd1498Szrj       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
199838fd1498Szrj 	do_peeling = false;
199938fd1498Szrj     }
200038fd1498Szrj 
200138fd1498Szrj   /* If there is an unsupportable data ref, prefer this over all choices so far
200238fd1498Szrj      since we'd have to discard a chosen peeling except when it accidentally
200338fd1498Szrj      aligned the unsupportable data ref.  */
200438fd1498Szrj   if (one_dr_unsupportable)
200538fd1498Szrj     dr0 = unsupportable_dr;
200638fd1498Szrj   else if (do_peeling)
200738fd1498Szrj     {
200838fd1498Szrj       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
200938fd1498Szrj 	 TODO: Use nopeel_outside_cost or get rid of it?  */
201038fd1498Szrj       unsigned nopeel_inside_cost = 0;
201138fd1498Szrj       unsigned nopeel_outside_cost = 0;
201238fd1498Szrj 
201338fd1498Szrj       stmt_vector_for_cost dummy;
201438fd1498Szrj       dummy.create (2);
201538fd1498Szrj       vect_get_peeling_costs_all_drs (datarefs, NULL, &nopeel_inside_cost,
201638fd1498Szrj 				      &nopeel_outside_cost, &dummy, 0, false);
201738fd1498Szrj       dummy.release ();
201838fd1498Szrj 
201938fd1498Szrj       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
202038fd1498Szrj 	 costs will be recorded.  */
202138fd1498Szrj       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
202238fd1498Szrj       prologue_cost_vec.create (2);
202338fd1498Szrj       epilogue_cost_vec.create (2);
202438fd1498Szrj 
202538fd1498Szrj       int dummy2;
202638fd1498Szrj       nopeel_outside_cost += vect_get_known_peeling_cost
202738fd1498Szrj 	(loop_vinfo, 0, &dummy2,
202838fd1498Szrj 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
202938fd1498Szrj 	 &prologue_cost_vec, &epilogue_cost_vec);
203038fd1498Szrj 
203138fd1498Szrj       prologue_cost_vec.release ();
203238fd1498Szrj       epilogue_cost_vec.release ();
203338fd1498Szrj 
203438fd1498Szrj       npeel = best_peel.peel_info.npeel;
203538fd1498Szrj       dr0 = best_peel.peel_info.dr;
203638fd1498Szrj 
203738fd1498Szrj       /* If no peeling is not more expensive than the best peeling we
203838fd1498Szrj 	 have so far, don't perform any peeling.  */
203938fd1498Szrj       if (nopeel_inside_cost <= best_peel.inside_cost)
204038fd1498Szrj 	do_peeling = false;
204138fd1498Szrj     }
204238fd1498Szrj 
204338fd1498Szrj   if (do_peeling)
204438fd1498Szrj     {
204538fd1498Szrj       stmt = DR_STMT (dr0);
204638fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
204738fd1498Szrj       vectype = STMT_VINFO_VECTYPE (stmt_info);
204838fd1498Szrj 
204938fd1498Szrj       if (known_alignment_for_access_p (dr0))
205038fd1498Szrj         {
205138fd1498Szrj 	  bool negative = tree_int_cst_compare (DR_STEP (dr0),
205238fd1498Szrj 						size_zero_node) < 0;
205338fd1498Szrj           if (!npeel)
205438fd1498Szrj             {
205538fd1498Szrj               /* Since it's known at compile time, compute the number of
205638fd1498Szrj                  iterations in the peeled loop (the peeling factor) for use in
205738fd1498Szrj                  updating DR_MISALIGNMENT values.  The peeling factor is the
205838fd1498Szrj                  vectorization factor minus the misalignment as an element
205938fd1498Szrj                  count.  */
206038fd1498Szrj 	      mis = negative ? DR_MISALIGNMENT (dr0) : -DR_MISALIGNMENT (dr0);
206138fd1498Szrj 	      unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
206238fd1498Szrj 	      npeel = ((mis & (target_align - 1))
206338fd1498Szrj 		       / vect_get_scalar_dr_size (dr0));
206438fd1498Szrj             }
206538fd1498Szrj 
206638fd1498Szrj 	  /* For interleaved data access every iteration accesses all the
206738fd1498Szrj 	     members of the group, therefore we divide the number of iterations
206838fd1498Szrj 	     by the group size.  */
206938fd1498Szrj 	  stmt_info = vinfo_for_stmt (DR_STMT (dr0));
207038fd1498Szrj 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
207138fd1498Szrj 	    npeel /= GROUP_SIZE (stmt_info);
207238fd1498Szrj 
207338fd1498Szrj           if (dump_enabled_p ())
207438fd1498Szrj             dump_printf_loc (MSG_NOTE, vect_location,
207538fd1498Szrj                              "Try peeling by %d\n", npeel);
207638fd1498Szrj         }
207738fd1498Szrj 
207838fd1498Szrj       /* Ensure that all datarefs can be vectorized after the peel.  */
207938fd1498Szrj       if (!vect_peeling_supportable (loop_vinfo, dr0, npeel))
208038fd1498Szrj 	do_peeling = false;
208138fd1498Szrj 
208238fd1498Szrj       /* Check if all datarefs are supportable and log.  */
208338fd1498Szrj       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
208438fd1498Szrj         {
208538fd1498Szrj           stat = vect_verify_datarefs_alignment (loop_vinfo);
208638fd1498Szrj           if (!stat)
208738fd1498Szrj             do_peeling = false;
208838fd1498Szrj           else
208938fd1498Szrj 	    return stat;
209038fd1498Szrj         }
209138fd1498Szrj 
209238fd1498Szrj       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
209338fd1498Szrj       if (do_peeling)
209438fd1498Szrj         {
209538fd1498Szrj           unsigned max_allowed_peel
209638fd1498Szrj             = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
209738fd1498Szrj           if (max_allowed_peel != (unsigned)-1)
209838fd1498Szrj             {
209938fd1498Szrj               unsigned max_peel = npeel;
210038fd1498Szrj               if (max_peel == 0)
210138fd1498Szrj                 {
210238fd1498Szrj 		  unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
210338fd1498Szrj 		  max_peel = target_align / vect_get_scalar_dr_size (dr0) - 1;
210438fd1498Szrj                 }
210538fd1498Szrj               if (max_peel > max_allowed_peel)
210638fd1498Szrj                 {
210738fd1498Szrj                   do_peeling = false;
210838fd1498Szrj                   if (dump_enabled_p ())
210938fd1498Szrj                     dump_printf_loc (MSG_NOTE, vect_location,
211038fd1498Szrj                         "Disable peeling, max peels reached: %d\n", max_peel);
211138fd1498Szrj                 }
211238fd1498Szrj             }
211338fd1498Szrj         }
211438fd1498Szrj 
211538fd1498Szrj       /* Cost model #2 - if peeling may result in a remaining loop not
211638fd1498Szrj 	 iterating enough to be vectorized then do not peel.  Since this
211738fd1498Szrj 	 is a cost heuristic rather than a correctness decision, use the
211838fd1498Szrj 	 most likely runtime value for variable vectorization factors.  */
211938fd1498Szrj       if (do_peeling
212038fd1498Szrj 	  && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
212138fd1498Szrj 	{
212238fd1498Szrj 	  unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
212338fd1498Szrj 	  unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
212438fd1498Szrj 	  if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
212538fd1498Szrj 	      < assumed_vf + max_peel)
212638fd1498Szrj 	    do_peeling = false;
212738fd1498Szrj 	}
212838fd1498Szrj 
212938fd1498Szrj       if (do_peeling)
213038fd1498Szrj         {
213138fd1498Szrj           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
213238fd1498Szrj              If the misalignment of DR_i is identical to that of dr0 then set
213338fd1498Szrj              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
213438fd1498Szrj              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
213538fd1498Szrj              by the peeling factor times the element size of DR_i (MOD the
213638fd1498Szrj              vectorization factor times the size).  Otherwise, the
213738fd1498Szrj              misalignment of DR_i must be set to unknown.  */
213838fd1498Szrj 	  FOR_EACH_VEC_ELT (datarefs, i, dr)
213938fd1498Szrj 	    if (dr != dr0)
214038fd1498Szrj 	      {
214138fd1498Szrj 		/* Strided accesses perform only component accesses, alignment
214238fd1498Szrj 		   is irrelevant for them.  */
214338fd1498Szrj 		stmt_info = vinfo_for_stmt (DR_STMT (dr));
214438fd1498Szrj 		if (STMT_VINFO_STRIDED_P (stmt_info)
214538fd1498Szrj 		    && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
214638fd1498Szrj 		  continue;
214738fd1498Szrj 
214838fd1498Szrj 		vect_update_misalignment_for_peel (dr, dr0, npeel);
214938fd1498Szrj 	      }
215038fd1498Szrj 
215138fd1498Szrj           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
215238fd1498Szrj           if (npeel)
215338fd1498Szrj             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
215438fd1498Szrj           else
215538fd1498Szrj             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
215638fd1498Szrj 	      = DR_MISALIGNMENT (dr0);
215738fd1498Szrj 	  SET_DR_MISALIGNMENT (dr0, 0);
215838fd1498Szrj 	  if (dump_enabled_p ())
215938fd1498Szrj             {
216038fd1498Szrj               dump_printf_loc (MSG_NOTE, vect_location,
216138fd1498Szrj                                "Alignment of access forced using peeling.\n");
216238fd1498Szrj               dump_printf_loc (MSG_NOTE, vect_location,
216338fd1498Szrj                                "Peeling for alignment will be applied.\n");
216438fd1498Szrj             }
216538fd1498Szrj 
216638fd1498Szrj 	  /* The inside-loop cost will be accounted for in vectorizable_load
216738fd1498Szrj 	     and vectorizable_store correctly with adjusted alignments.
216838fd1498Szrj 	     Drop the body_cst_vec on the floor here.  */
216938fd1498Szrj 	  stat = vect_verify_datarefs_alignment (loop_vinfo);
217038fd1498Szrj 	  gcc_assert (stat);
217138fd1498Szrj           return stat;
217238fd1498Szrj         }
217338fd1498Szrj     }
217438fd1498Szrj 
217538fd1498Szrj   /* (2) Versioning to force alignment.  */
217638fd1498Szrj 
217738fd1498Szrj   /* Try versioning if:
217838fd1498Szrj      1) optimize loop for speed
217938fd1498Szrj      2) there is at least one unsupported misaligned data ref with an unknown
218038fd1498Szrj         misalignment, and
218138fd1498Szrj      3) all misaligned data refs with a known misalignment are supported, and
218238fd1498Szrj      4) the number of runtime alignment checks is within reason.  */
218338fd1498Szrj 
218438fd1498Szrj   do_versioning =
218538fd1498Szrj 	optimize_loop_nest_for_speed_p (loop)
218638fd1498Szrj 	&& (!loop->inner); /* FORNOW */
218738fd1498Szrj 
218838fd1498Szrj   if (do_versioning)
218938fd1498Szrj     {
219038fd1498Szrj       FOR_EACH_VEC_ELT (datarefs, i, dr)
219138fd1498Szrj         {
219238fd1498Szrj 	  stmt = DR_STMT (dr);
219338fd1498Szrj 	  stmt_info = vinfo_for_stmt (stmt);
219438fd1498Szrj 
219538fd1498Szrj 	  /* For interleaving, only the alignment of the first access
219638fd1498Szrj 	     matters.  */
219738fd1498Szrj 	  if (aligned_access_p (dr)
219838fd1498Szrj 	      || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
219938fd1498Szrj 		  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
220038fd1498Szrj 	    continue;
220138fd1498Szrj 
220238fd1498Szrj 	  if (STMT_VINFO_STRIDED_P (stmt_info))
220338fd1498Szrj 	    {
220438fd1498Szrj 	      /* Strided loads perform only component accesses, alignment is
220538fd1498Szrj 		 irrelevant for them.  */
220638fd1498Szrj 	      if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
220738fd1498Szrj 		continue;
220838fd1498Szrj 	      do_versioning = false;
220938fd1498Szrj 	      break;
221038fd1498Szrj 	    }
221138fd1498Szrj 
221238fd1498Szrj 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
221338fd1498Szrj 
221438fd1498Szrj           if (!supportable_dr_alignment)
221538fd1498Szrj             {
221638fd1498Szrj 	      gimple *stmt;
221738fd1498Szrj               int mask;
221838fd1498Szrj               tree vectype;
221938fd1498Szrj 
222038fd1498Szrj               if (known_alignment_for_access_p (dr)
222138fd1498Szrj                   || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
222238fd1498Szrj                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
222338fd1498Szrj                 {
222438fd1498Szrj                   do_versioning = false;
222538fd1498Szrj                   break;
222638fd1498Szrj                 }
222738fd1498Szrj 
222838fd1498Szrj               stmt = DR_STMT (dr);
222938fd1498Szrj               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
223038fd1498Szrj               gcc_assert (vectype);
223138fd1498Szrj 
223238fd1498Szrj 	      /* At present we don't support versioning for alignment
223338fd1498Szrj 		 with variable VF, since there's no guarantee that the
223438fd1498Szrj 		 VF is a power of two.  We could relax this if we added
223538fd1498Szrj 		 a way of enforcing a power-of-two size.  */
223638fd1498Szrj 	      unsigned HOST_WIDE_INT size;
223738fd1498Szrj 	      if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
223838fd1498Szrj 		{
223938fd1498Szrj 		  do_versioning = false;
224038fd1498Szrj 		  break;
224138fd1498Szrj 		}
224238fd1498Szrj 
224338fd1498Szrj               /* The rightmost bits of an aligned address must be zeros.
224438fd1498Szrj                  Construct the mask needed for this test.  For example,
224538fd1498Szrj                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
224638fd1498Szrj                  mask must be 15 = 0xf. */
224738fd1498Szrj 	      mask = size - 1;
224838fd1498Szrj 
224938fd1498Szrj               /* FORNOW: use the same mask to test all potentially unaligned
225038fd1498Szrj                  references in the loop.  The vectorizer currently supports
225138fd1498Szrj                  a single vector size, see the reference to
225238fd1498Szrj                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
225338fd1498Szrj                  vectorization factor is computed.  */
225438fd1498Szrj               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
225538fd1498Szrj                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
225638fd1498Szrj               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
225738fd1498Szrj               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
225838fd1498Szrj 		      DR_STMT (dr));
225938fd1498Szrj             }
226038fd1498Szrj         }
226138fd1498Szrj 
226238fd1498Szrj       /* Versioning requires at least one misaligned data reference.  */
226338fd1498Szrj       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
226438fd1498Szrj         do_versioning = false;
226538fd1498Szrj       else if (!do_versioning)
226638fd1498Szrj         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
226738fd1498Szrj     }
226838fd1498Szrj 
226938fd1498Szrj   if (do_versioning)
227038fd1498Szrj     {
227138fd1498Szrj       vec<gimple *> may_misalign_stmts
227238fd1498Szrj         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
227338fd1498Szrj       gimple *stmt;
227438fd1498Szrj 
227538fd1498Szrj       /* It can now be assumed that the data references in the statements
227638fd1498Szrj          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
227738fd1498Szrj          of the loop being vectorized.  */
227838fd1498Szrj       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
227938fd1498Szrj         {
228038fd1498Szrj           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
228138fd1498Szrj           dr = STMT_VINFO_DATA_REF (stmt_info);
228238fd1498Szrj 	  SET_DR_MISALIGNMENT (dr, 0);
228338fd1498Szrj 	  if (dump_enabled_p ())
228438fd1498Szrj             dump_printf_loc (MSG_NOTE, vect_location,
228538fd1498Szrj                              "Alignment of access forced using versioning.\n");
228638fd1498Szrj         }
228738fd1498Szrj 
228838fd1498Szrj       if (dump_enabled_p ())
228938fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
229038fd1498Szrj                          "Versioning for alignment will be applied.\n");
229138fd1498Szrj 
229238fd1498Szrj       /* Peeling and versioning can't be done together at this time.  */
229338fd1498Szrj       gcc_assert (! (do_peeling && do_versioning));
229438fd1498Szrj 
229538fd1498Szrj       stat = vect_verify_datarefs_alignment (loop_vinfo);
229638fd1498Szrj       gcc_assert (stat);
229738fd1498Szrj       return stat;
229838fd1498Szrj     }
229938fd1498Szrj 
230038fd1498Szrj   /* This point is reached if neither peeling nor versioning is being done.  */
230138fd1498Szrj   gcc_assert (! (do_peeling || do_versioning));
230238fd1498Szrj 
230338fd1498Szrj   stat = vect_verify_datarefs_alignment (loop_vinfo);
230438fd1498Szrj   return stat;
230538fd1498Szrj }
230638fd1498Szrj 
230738fd1498Szrj 
230838fd1498Szrj /* Function vect_find_same_alignment_drs.
230938fd1498Szrj 
231038fd1498Szrj    Update group and alignment relations according to the chosen
231138fd1498Szrj    vectorization factor.  */
231238fd1498Szrj 
231338fd1498Szrj static void
vect_find_same_alignment_drs(struct data_dependence_relation * ddr)231438fd1498Szrj vect_find_same_alignment_drs (struct data_dependence_relation *ddr)
231538fd1498Szrj {
231638fd1498Szrj   struct data_reference *dra = DDR_A (ddr);
231738fd1498Szrj   struct data_reference *drb = DDR_B (ddr);
231838fd1498Szrj   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
231938fd1498Szrj   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
232038fd1498Szrj 
232138fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
232238fd1498Szrj     return;
232338fd1498Szrj 
232438fd1498Szrj   if (dra == drb)
232538fd1498Szrj     return;
232638fd1498Szrj 
232738fd1498Szrj   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
232838fd1498Szrj       || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
232938fd1498Szrj       || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
233038fd1498Szrj     return;
233138fd1498Szrj 
233238fd1498Szrj   /* Two references with distance zero have the same alignment.  */
233338fd1498Szrj   poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra))
233438fd1498Szrj 			  - wi::to_poly_offset (DR_INIT (drb)));
233538fd1498Szrj   if (maybe_ne (diff, 0))
233638fd1498Szrj     {
233738fd1498Szrj       /* Get the wider of the two alignments.  */
233838fd1498Szrj       unsigned int align_a = (vect_calculate_target_alignment (dra)
233938fd1498Szrj 			      / BITS_PER_UNIT);
234038fd1498Szrj       unsigned int align_b = (vect_calculate_target_alignment (drb)
234138fd1498Szrj 			      / BITS_PER_UNIT);
234238fd1498Szrj       unsigned int max_align = MAX (align_a, align_b);
234338fd1498Szrj 
234438fd1498Szrj       /* Require the gap to be a multiple of the larger vector alignment.  */
234538fd1498Szrj       if (!multiple_p (diff, max_align))
234638fd1498Szrj 	return;
234738fd1498Szrj     }
234838fd1498Szrj 
234938fd1498Szrj   STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
235038fd1498Szrj   STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
235138fd1498Szrj   if (dump_enabled_p ())
235238fd1498Szrj     {
235338fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
235438fd1498Szrj 		       "accesses have the same alignment: ");
235538fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
235638fd1498Szrj       dump_printf (MSG_NOTE,  " and ");
235738fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
235838fd1498Szrj       dump_printf (MSG_NOTE, "\n");
235938fd1498Szrj     }
236038fd1498Szrj }
236138fd1498Szrj 
236238fd1498Szrj 
236338fd1498Szrj /* Function vect_analyze_data_refs_alignment
236438fd1498Szrj 
236538fd1498Szrj    Analyze the alignment of the data-references in the loop.
236638fd1498Szrj    Return FALSE if a data reference is found that cannot be vectorized.  */
236738fd1498Szrj 
236838fd1498Szrj bool
vect_analyze_data_refs_alignment(loop_vec_info vinfo)236938fd1498Szrj vect_analyze_data_refs_alignment (loop_vec_info vinfo)
237038fd1498Szrj {
237138fd1498Szrj   if (dump_enabled_p ())
237238fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
237338fd1498Szrj                      "=== vect_analyze_data_refs_alignment ===\n");
237438fd1498Szrj 
237538fd1498Szrj   /* Mark groups of data references with same alignment using
237638fd1498Szrj      data dependence information.  */
237738fd1498Szrj   vec<ddr_p> ddrs = vinfo->ddrs;
237838fd1498Szrj   struct data_dependence_relation *ddr;
237938fd1498Szrj   unsigned int i;
238038fd1498Szrj 
238138fd1498Szrj   FOR_EACH_VEC_ELT (ddrs, i, ddr)
238238fd1498Szrj     vect_find_same_alignment_drs (ddr);
238338fd1498Szrj 
238438fd1498Szrj   vec<data_reference_p> datarefs = vinfo->datarefs;
238538fd1498Szrj   struct data_reference *dr;
238638fd1498Szrj 
238738fd1498Szrj   vect_record_base_alignments (vinfo);
238838fd1498Szrj   FOR_EACH_VEC_ELT (datarefs, i, dr)
238938fd1498Szrj     {
239038fd1498Szrj       stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
239138fd1498Szrj       if (STMT_VINFO_VECTORIZABLE (stmt_info)
239238fd1498Szrj 	  && !vect_compute_data_ref_alignment (dr))
239338fd1498Szrj 	{
239438fd1498Szrj 	  /* Strided accesses perform only component accesses, misalignment
239538fd1498Szrj 	     information is irrelevant for them.  */
239638fd1498Szrj 	  if (STMT_VINFO_STRIDED_P (stmt_info)
239738fd1498Szrj 	      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
239838fd1498Szrj 	    continue;
239938fd1498Szrj 
240038fd1498Szrj 	  if (dump_enabled_p ())
240138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
240238fd1498Szrj 			     "not vectorized: can't calculate alignment "
240338fd1498Szrj 			     "for data ref.\n");
240438fd1498Szrj 
240538fd1498Szrj 	  return false;
240638fd1498Szrj 	}
240738fd1498Szrj     }
240838fd1498Szrj 
240938fd1498Szrj   return true;
241038fd1498Szrj }
241138fd1498Szrj 
241238fd1498Szrj 
241338fd1498Szrj /* Analyze alignment of DRs of stmts in NODE.  */
241438fd1498Szrj 
241538fd1498Szrj static bool
vect_slp_analyze_and_verify_node_alignment(slp_tree node)241638fd1498Szrj vect_slp_analyze_and_verify_node_alignment (slp_tree node)
241738fd1498Szrj {
241838fd1498Szrj   /* We vectorize from the first scalar stmt in the node unless
241938fd1498Szrj      the node is permuted in which case we start from the first
242038fd1498Szrj      element in the group.  */
242138fd1498Szrj   gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
242238fd1498Szrj   data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
242338fd1498Szrj   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
242438fd1498Szrj     first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
242538fd1498Szrj 
242638fd1498Szrj   data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
242738fd1498Szrj   if (! vect_compute_data_ref_alignment (dr)
242838fd1498Szrj       /* For creating the data-ref pointer we need alignment of the
242938fd1498Szrj 	 first element anyway.  */
243038fd1498Szrj       || (dr != first_dr
243138fd1498Szrj 	  && ! vect_compute_data_ref_alignment (first_dr))
243238fd1498Szrj       || ! verify_data_ref_alignment (dr))
243338fd1498Szrj     {
243438fd1498Szrj       if (dump_enabled_p ())
243538fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
243638fd1498Szrj 			 "not vectorized: bad data alignment in basic "
243738fd1498Szrj 			 "block.\n");
243838fd1498Szrj       return false;
243938fd1498Szrj     }
244038fd1498Szrj 
244138fd1498Szrj   return true;
244238fd1498Szrj }
244338fd1498Szrj 
244438fd1498Szrj /* Function vect_slp_analyze_instance_alignment
244538fd1498Szrj 
244638fd1498Szrj    Analyze the alignment of the data-references in the SLP instance.
244738fd1498Szrj    Return FALSE if a data reference is found that cannot be vectorized.  */
244838fd1498Szrj 
244938fd1498Szrj bool
vect_slp_analyze_and_verify_instance_alignment(slp_instance instance)245038fd1498Szrj vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
245138fd1498Szrj {
245238fd1498Szrj   if (dump_enabled_p ())
245338fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
245438fd1498Szrj                      "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
245538fd1498Szrj 
245638fd1498Szrj   slp_tree node;
245738fd1498Szrj   unsigned i;
245838fd1498Szrj   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
245938fd1498Szrj     if (! vect_slp_analyze_and_verify_node_alignment (node))
246038fd1498Szrj       return false;
246138fd1498Szrj 
246238fd1498Szrj   node = SLP_INSTANCE_TREE (instance);
246338fd1498Szrj   if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
246438fd1498Szrj       && ! vect_slp_analyze_and_verify_node_alignment
246538fd1498Szrj 	     (SLP_INSTANCE_TREE (instance)))
246638fd1498Szrj     return false;
246738fd1498Szrj 
246838fd1498Szrj   return true;
246938fd1498Szrj }
247038fd1498Szrj 
247138fd1498Szrj 
247238fd1498Szrj /* Analyze groups of accesses: check that DR belongs to a group of
247338fd1498Szrj    accesses of legal size, step, etc.  Detect gaps, single element
247438fd1498Szrj    interleaving, and other special cases. Set grouped access info.
247538fd1498Szrj    Collect groups of strided stores for further use in SLP analysis.
247638fd1498Szrj    Worker for vect_analyze_group_access.  */
247738fd1498Szrj 
247838fd1498Szrj static bool
vect_analyze_group_access_1(struct data_reference * dr)247938fd1498Szrj vect_analyze_group_access_1 (struct data_reference *dr)
248038fd1498Szrj {
248138fd1498Szrj   tree step = DR_STEP (dr);
248238fd1498Szrj   tree scalar_type = TREE_TYPE (DR_REF (dr));
248338fd1498Szrj   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
248438fd1498Szrj   gimple *stmt = DR_STMT (dr);
248538fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
248638fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
248738fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
248838fd1498Szrj   HOST_WIDE_INT dr_step = -1;
248938fd1498Szrj   HOST_WIDE_INT groupsize, last_accessed_element = 1;
249038fd1498Szrj   bool slp_impossible = false;
249138fd1498Szrj 
249238fd1498Szrj   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
249338fd1498Szrj      size of the interleaving group (including gaps).  */
249438fd1498Szrj   if (tree_fits_shwi_p (step))
249538fd1498Szrj     {
249638fd1498Szrj       dr_step = tree_to_shwi (step);
249738fd1498Szrj       /* Check that STEP is a multiple of type size.  Otherwise there is
249838fd1498Szrj          a non-element-sized gap at the end of the group which we
249938fd1498Szrj 	 cannot represent in GROUP_GAP or GROUP_SIZE.
250038fd1498Szrj 	 ???  As we can handle non-constant step fine here we should
250138fd1498Szrj 	 simply remove uses of GROUP_GAP between the last and first
250238fd1498Szrj 	 element and instead rely on DR_STEP.  GROUP_SIZE then would
250338fd1498Szrj 	 simply not include that gap.  */
250438fd1498Szrj       if ((dr_step % type_size) != 0)
250538fd1498Szrj 	{
250638fd1498Szrj 	  if (dump_enabled_p ())
250738fd1498Szrj 	    {
250838fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
250938fd1498Szrj 	                       "Step ");
251038fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
251138fd1498Szrj 	      dump_printf (MSG_NOTE,
251238fd1498Szrj 			   " is not a multiple of the element size for ");
251338fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
251438fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
251538fd1498Szrj 	    }
251638fd1498Szrj 	  return false;
251738fd1498Szrj 	}
251838fd1498Szrj       groupsize = absu_hwi (dr_step) / type_size;
251938fd1498Szrj     }
252038fd1498Szrj   else
252138fd1498Szrj     groupsize = 0;
252238fd1498Szrj 
252338fd1498Szrj   /* Not consecutive access is possible only if it is a part of interleaving.  */
252438fd1498Szrj   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
252538fd1498Szrj     {
252638fd1498Szrj       /* Check if it this DR is a part of interleaving, and is a single
252738fd1498Szrj 	 element of the group that is accessed in the loop.  */
252838fd1498Szrj 
252938fd1498Szrj       /* Gaps are supported only for loads. STEP must be a multiple of the type
253038fd1498Szrj 	 size.  */
253138fd1498Szrj       if (DR_IS_READ (dr)
253238fd1498Szrj 	  && (dr_step % type_size) == 0
253338fd1498Szrj 	  && groupsize > 0)
253438fd1498Szrj 	{
253538fd1498Szrj 	  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
253638fd1498Szrj 	  GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
253738fd1498Szrj 	  GROUP_GAP (stmt_info) = groupsize - 1;
253838fd1498Szrj 	  if (dump_enabled_p ())
253938fd1498Szrj 	    {
254038fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
254138fd1498Szrj 	                       "Detected single element interleaving ");
254238fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
254338fd1498Szrj 	      dump_printf (MSG_NOTE, " step ");
254438fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
254538fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
254638fd1498Szrj 	    }
254738fd1498Szrj 
254838fd1498Szrj 	  return true;
254938fd1498Szrj 	}
255038fd1498Szrj 
255138fd1498Szrj       if (dump_enabled_p ())
255238fd1498Szrj         {
255338fd1498Szrj  	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
255438fd1498Szrj 	                   "not consecutive access ");
255538fd1498Szrj 	  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
255638fd1498Szrj         }
255738fd1498Szrj 
255838fd1498Szrj       if (bb_vinfo)
255938fd1498Szrj         {
256038fd1498Szrj           /* Mark the statement as unvectorizable.  */
256138fd1498Szrj           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
256238fd1498Szrj           return true;
256338fd1498Szrj         }
256438fd1498Szrj 
256538fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
256638fd1498Szrj       STMT_VINFO_STRIDED_P (stmt_info) = true;
256738fd1498Szrj       return true;
256838fd1498Szrj     }
256938fd1498Szrj 
257038fd1498Szrj   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
257138fd1498Szrj     {
257238fd1498Szrj       /* First stmt in the interleaving chain. Check the chain.  */
257338fd1498Szrj       gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
257438fd1498Szrj       struct data_reference *data_ref = dr;
257538fd1498Szrj       unsigned int count = 1;
257638fd1498Szrj       tree prev_init = DR_INIT (data_ref);
257738fd1498Szrj       gimple *prev = stmt;
257838fd1498Szrj       HOST_WIDE_INT diff, gaps = 0;
257938fd1498Szrj 
258038fd1498Szrj       /* By construction, all group members have INTEGER_CST DR_INITs.  */
258138fd1498Szrj       while (next)
258238fd1498Szrj         {
258338fd1498Szrj           /* Skip same data-refs.  In case that two or more stmts share
258438fd1498Szrj              data-ref (supported only for loads), we vectorize only the first
258538fd1498Szrj              stmt, and the rest get their vectorized loads from the first
258638fd1498Szrj              one.  */
258738fd1498Szrj           if (!tree_int_cst_compare (DR_INIT (data_ref),
258838fd1498Szrj                                      DR_INIT (STMT_VINFO_DATA_REF (
258938fd1498Szrj 						   vinfo_for_stmt (next)))))
259038fd1498Szrj             {
259138fd1498Szrj               if (DR_IS_WRITE (data_ref))
259238fd1498Szrj                 {
259338fd1498Szrj                   if (dump_enabled_p ())
259438fd1498Szrj                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
259538fd1498Szrj                                      "Two store stmts share the same dr.\n");
259638fd1498Szrj                   return false;
259738fd1498Szrj                 }
259838fd1498Szrj 
259938fd1498Szrj 	      if (dump_enabled_p ())
260038fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
260138fd1498Szrj 				 "Two or more load stmts share the same dr.\n");
260238fd1498Szrj 
260338fd1498Szrj               /* For load use the same data-ref load.  */
260438fd1498Szrj               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
260538fd1498Szrj 
260638fd1498Szrj               prev = next;
260738fd1498Szrj               next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
260838fd1498Szrj               continue;
260938fd1498Szrj             }
261038fd1498Szrj 
261138fd1498Szrj           prev = next;
261238fd1498Szrj           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
261338fd1498Szrj 
261438fd1498Szrj 	  /* All group members have the same STEP by construction.  */
261538fd1498Szrj 	  gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
261638fd1498Szrj 
261738fd1498Szrj           /* Check that the distance between two accesses is equal to the type
261838fd1498Szrj              size. Otherwise, we have gaps.  */
261938fd1498Szrj           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
262038fd1498Szrj                   - TREE_INT_CST_LOW (prev_init)) / type_size;
262138fd1498Szrj 	  if (diff != 1)
262238fd1498Szrj 	    {
262338fd1498Szrj 	      /* FORNOW: SLP of accesses with gaps is not supported.  */
262438fd1498Szrj 	      slp_impossible = true;
262538fd1498Szrj 	      if (DR_IS_WRITE (data_ref))
262638fd1498Szrj 		{
262738fd1498Szrj                   if (dump_enabled_p ())
262838fd1498Szrj                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
262938fd1498Szrj                                      "interleaved store with gaps\n");
263038fd1498Szrj 		  return false;
263138fd1498Szrj 		}
263238fd1498Szrj 
263338fd1498Szrj               gaps += diff - 1;
263438fd1498Szrj 	    }
263538fd1498Szrj 
263638fd1498Szrj 	  last_accessed_element += diff;
263738fd1498Szrj 
263838fd1498Szrj           /* Store the gap from the previous member of the group. If there is no
263938fd1498Szrj              gap in the access, GROUP_GAP is always 1.  */
264038fd1498Szrj           GROUP_GAP (vinfo_for_stmt (next)) = diff;
264138fd1498Szrj 
264238fd1498Szrj           prev_init = DR_INIT (data_ref);
264338fd1498Szrj           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
264438fd1498Szrj           /* Count the number of data-refs in the chain.  */
264538fd1498Szrj           count++;
264638fd1498Szrj         }
264738fd1498Szrj 
264838fd1498Szrj       if (groupsize == 0)
264938fd1498Szrj         groupsize = count + gaps;
265038fd1498Szrj 
265138fd1498Szrj       /* This could be UINT_MAX but as we are generating code in a very
265238fd1498Szrj          inefficient way we have to cap earlier.  See PR78699 for example.  */
265338fd1498Szrj       if (groupsize > 4096)
265438fd1498Szrj 	{
265538fd1498Szrj 	  if (dump_enabled_p ())
265638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
265738fd1498Szrj 			     "group is too large\n");
265838fd1498Szrj 	  return false;
265938fd1498Szrj 	}
266038fd1498Szrj 
266138fd1498Szrj       /* Check that the size of the interleaving is equal to count for stores,
266238fd1498Szrj          i.e., that there are no gaps.  */
266338fd1498Szrj       if (groupsize != count
266438fd1498Szrj 	  && !DR_IS_READ (dr))
266538fd1498Szrj         {
266638fd1498Szrj 	  if (dump_enabled_p ())
266738fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
266838fd1498Szrj 			     "interleaved store with gaps\n");
266938fd1498Szrj 	  return false;
267038fd1498Szrj 	}
267138fd1498Szrj 
267238fd1498Szrj       /* If there is a gap after the last load in the group it is the
267338fd1498Szrj 	 difference between the groupsize and the last accessed
267438fd1498Szrj 	 element.
267538fd1498Szrj 	 When there is no gap, this difference should be 0.  */
267638fd1498Szrj       GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
267738fd1498Szrj 
267838fd1498Szrj       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
267938fd1498Szrj       if (dump_enabled_p ())
268038fd1498Szrj 	{
268138fd1498Szrj 	  dump_printf_loc (MSG_NOTE, vect_location,
268238fd1498Szrj 			   "Detected interleaving ");
268338fd1498Szrj 	  if (DR_IS_READ (dr))
268438fd1498Szrj 	    dump_printf (MSG_NOTE, "load ");
268538fd1498Szrj 	  else
268638fd1498Szrj 	    dump_printf (MSG_NOTE, "store ");
268738fd1498Szrj 	  dump_printf (MSG_NOTE, "of size %u starting with ",
268838fd1498Szrj 		       (unsigned)groupsize);
268938fd1498Szrj 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
269038fd1498Szrj 	  if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
269138fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
269238fd1498Szrj 			     "There is a gap of %u elements after the group\n",
269338fd1498Szrj 			     GROUP_GAP (vinfo_for_stmt (stmt)));
269438fd1498Szrj 	}
269538fd1498Szrj 
269638fd1498Szrj       /* SLP: create an SLP data structure for every interleaving group of
269738fd1498Szrj 	 stores for further analysis in vect_analyse_slp.  */
269838fd1498Szrj       if (DR_IS_WRITE (dr) && !slp_impossible)
269938fd1498Szrj         {
270038fd1498Szrj           if (loop_vinfo)
270138fd1498Szrj             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
270238fd1498Szrj           if (bb_vinfo)
270338fd1498Szrj             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
270438fd1498Szrj         }
270538fd1498Szrj     }
270638fd1498Szrj 
270738fd1498Szrj   return true;
270838fd1498Szrj }
270938fd1498Szrj 
271038fd1498Szrj /* Analyze groups of accesses: check that DR belongs to a group of
271138fd1498Szrj    accesses of legal size, step, etc.  Detect gaps, single element
271238fd1498Szrj    interleaving, and other special cases. Set grouped access info.
271338fd1498Szrj    Collect groups of strided stores for further use in SLP analysis.  */
271438fd1498Szrj 
271538fd1498Szrj static bool
vect_analyze_group_access(struct data_reference * dr)271638fd1498Szrj vect_analyze_group_access (struct data_reference *dr)
271738fd1498Szrj {
271838fd1498Szrj   if (!vect_analyze_group_access_1 (dr))
271938fd1498Szrj     {
272038fd1498Szrj       /* Dissolve the group if present.  */
272138fd1498Szrj       gimple *next;
272238fd1498Szrj       gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
272338fd1498Szrj       while (stmt)
272438fd1498Szrj 	{
272538fd1498Szrj 	  stmt_vec_info vinfo = vinfo_for_stmt (stmt);
272638fd1498Szrj 	  next = GROUP_NEXT_ELEMENT (vinfo);
272738fd1498Szrj 	  GROUP_FIRST_ELEMENT (vinfo) = NULL;
272838fd1498Szrj 	  GROUP_NEXT_ELEMENT (vinfo) = NULL;
272938fd1498Szrj 	  stmt = next;
273038fd1498Szrj 	}
273138fd1498Szrj       return false;
273238fd1498Szrj     }
273338fd1498Szrj   return true;
273438fd1498Szrj }
273538fd1498Szrj 
273638fd1498Szrj /* Analyze the access pattern of the data-reference DR.
273738fd1498Szrj    In case of non-consecutive accesses call vect_analyze_group_access() to
273838fd1498Szrj    analyze groups of accesses.  */
273938fd1498Szrj 
274038fd1498Szrj static bool
vect_analyze_data_ref_access(struct data_reference * dr)274138fd1498Szrj vect_analyze_data_ref_access (struct data_reference *dr)
274238fd1498Szrj {
274338fd1498Szrj   tree step = DR_STEP (dr);
274438fd1498Szrj   tree scalar_type = TREE_TYPE (DR_REF (dr));
274538fd1498Szrj   gimple *stmt = DR_STMT (dr);
274638fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
274738fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
274838fd1498Szrj   struct loop *loop = NULL;
274938fd1498Szrj 
275038fd1498Szrj   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
275138fd1498Szrj     return true;
275238fd1498Szrj 
275338fd1498Szrj   if (loop_vinfo)
275438fd1498Szrj     loop = LOOP_VINFO_LOOP (loop_vinfo);
275538fd1498Szrj 
275638fd1498Szrj   if (loop_vinfo && !step)
275738fd1498Szrj     {
275838fd1498Szrj       if (dump_enabled_p ())
275938fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276038fd1498Szrj 	                 "bad data-ref access in loop\n");
276138fd1498Szrj       return false;
276238fd1498Szrj     }
276338fd1498Szrj 
276438fd1498Szrj   /* Allow loads with zero step in inner-loop vectorization.  */
276538fd1498Szrj   if (loop_vinfo && integer_zerop (step))
276638fd1498Szrj     {
276738fd1498Szrj       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
276838fd1498Szrj       if (!nested_in_vect_loop_p (loop, stmt))
276938fd1498Szrj 	return DR_IS_READ (dr);
277038fd1498Szrj       /* Allow references with zero step for outer loops marked
277138fd1498Szrj 	 with pragma omp simd only - it guarantees absence of
277238fd1498Szrj 	 loop-carried dependencies between inner loop iterations.  */
277338fd1498Szrj       if (loop->safelen < 2)
277438fd1498Szrj 	{
277538fd1498Szrj 	  if (dump_enabled_p ())
277638fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
277738fd1498Szrj 			     "zero step in inner loop of nest\n");
277838fd1498Szrj 	  return false;
277938fd1498Szrj 	}
278038fd1498Szrj     }
278138fd1498Szrj 
278238fd1498Szrj   if (loop && nested_in_vect_loop_p (loop, stmt))
278338fd1498Szrj     {
278438fd1498Szrj       /* Interleaved accesses are not yet supported within outer-loop
278538fd1498Szrj         vectorization for references in the inner-loop.  */
278638fd1498Szrj       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
278738fd1498Szrj 
278838fd1498Szrj       /* For the rest of the analysis we use the outer-loop step.  */
278938fd1498Szrj       step = STMT_VINFO_DR_STEP (stmt_info);
279038fd1498Szrj       if (integer_zerop (step))
279138fd1498Szrj 	{
279238fd1498Szrj 	  if (dump_enabled_p ())
279338fd1498Szrj 	    dump_printf_loc (MSG_NOTE, vect_location,
279438fd1498Szrj 	                     "zero step in outer loop.\n");
279538fd1498Szrj 	  return DR_IS_READ (dr);
279638fd1498Szrj 	}
279738fd1498Szrj     }
279838fd1498Szrj 
279938fd1498Szrj   /* Consecutive?  */
280038fd1498Szrj   if (TREE_CODE (step) == INTEGER_CST)
280138fd1498Szrj     {
280238fd1498Szrj       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
280338fd1498Szrj       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
280438fd1498Szrj 	  || (dr_step < 0
280538fd1498Szrj 	      && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
280638fd1498Szrj 	{
280738fd1498Szrj 	  /* Mark that it is not interleaving.  */
280838fd1498Szrj 	  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
280938fd1498Szrj 	  return true;
281038fd1498Szrj 	}
281138fd1498Szrj     }
281238fd1498Szrj 
281338fd1498Szrj   if (loop && nested_in_vect_loop_p (loop, stmt))
281438fd1498Szrj     {
281538fd1498Szrj       if (dump_enabled_p ())
281638fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
281738fd1498Szrj 	                 "grouped access in outer loop.\n");
281838fd1498Szrj       return false;
281938fd1498Szrj     }
282038fd1498Szrj 
282138fd1498Szrj 
282238fd1498Szrj   /* Assume this is a DR handled by non-constant strided load case.  */
282338fd1498Szrj   if (TREE_CODE (step) != INTEGER_CST)
282438fd1498Szrj     return (STMT_VINFO_STRIDED_P (stmt_info)
282538fd1498Szrj 	    && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
282638fd1498Szrj 		|| vect_analyze_group_access (dr)));
282738fd1498Szrj 
282838fd1498Szrj   /* Not consecutive access - check if it's a part of interleaving group.  */
282938fd1498Szrj   return vect_analyze_group_access (dr);
283038fd1498Szrj }
283138fd1498Szrj 
283238fd1498Szrj /* Compare two data-references DRA and DRB to group them into chunks
283338fd1498Szrj    suitable for grouping.  */
283438fd1498Szrj 
283538fd1498Szrj static int
dr_group_sort_cmp(const void * dra_,const void * drb_)283638fd1498Szrj dr_group_sort_cmp (const void *dra_, const void *drb_)
283738fd1498Szrj {
283838fd1498Szrj   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
283938fd1498Szrj   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
284038fd1498Szrj   int cmp;
284138fd1498Szrj 
284238fd1498Szrj   /* Stabilize sort.  */
284338fd1498Szrj   if (dra == drb)
284438fd1498Szrj     return 0;
284538fd1498Szrj 
284638fd1498Szrj   /* DRs in different loops never belong to the same group.  */
284738fd1498Szrj   loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
284838fd1498Szrj   loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
284938fd1498Szrj   if (loopa != loopb)
285038fd1498Szrj     return loopa->num < loopb->num ? -1 : 1;
285138fd1498Szrj 
285238fd1498Szrj   /* Ordering of DRs according to base.  */
285338fd1498Szrj   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
285438fd1498Szrj 			       DR_BASE_ADDRESS (drb));
285538fd1498Szrj   if (cmp != 0)
285638fd1498Szrj     return cmp;
285738fd1498Szrj 
285838fd1498Szrj   /* And according to DR_OFFSET.  */
285938fd1498Szrj   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
286038fd1498Szrj   if (cmp != 0)
286138fd1498Szrj     return cmp;
286238fd1498Szrj 
286338fd1498Szrj   /* Put reads before writes.  */
286438fd1498Szrj   if (DR_IS_READ (dra) != DR_IS_READ (drb))
286538fd1498Szrj     return DR_IS_READ (dra) ? -1 : 1;
286638fd1498Szrj 
286738fd1498Szrj   /* Then sort after access size.  */
286838fd1498Szrj   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
286938fd1498Szrj 			       TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
287038fd1498Szrj   if (cmp != 0)
287138fd1498Szrj     return cmp;
287238fd1498Szrj 
287338fd1498Szrj   /* And after step.  */
287438fd1498Szrj   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
287538fd1498Szrj   if (cmp != 0)
287638fd1498Szrj     return cmp;
287738fd1498Szrj 
287838fd1498Szrj   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
287938fd1498Szrj   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
288038fd1498Szrj   if (cmp == 0)
288138fd1498Szrj     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
288238fd1498Szrj   return cmp;
288338fd1498Szrj }
288438fd1498Szrj 
288538fd1498Szrj /* If OP is the result of a conversion, return the unconverted value,
288638fd1498Szrj    otherwise return null.  */
288738fd1498Szrj 
288838fd1498Szrj static tree
strip_conversion(tree op)288938fd1498Szrj strip_conversion (tree op)
289038fd1498Szrj {
289138fd1498Szrj   if (TREE_CODE (op) != SSA_NAME)
289238fd1498Szrj     return NULL_TREE;
289338fd1498Szrj   gimple *stmt = SSA_NAME_DEF_STMT (op);
289438fd1498Szrj   if (!is_gimple_assign (stmt)
289538fd1498Szrj       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
289638fd1498Szrj     return NULL_TREE;
289738fd1498Szrj   return gimple_assign_rhs1 (stmt);
289838fd1498Szrj }
289938fd1498Szrj 
290038fd1498Szrj /* Return true if vectorizable_* routines can handle statements STMT1
290138fd1498Szrj    and STMT2 being in a single group.  */
290238fd1498Szrj 
290338fd1498Szrj static bool
can_group_stmts_p(gimple * stmt1,gimple * stmt2)290438fd1498Szrj can_group_stmts_p (gimple *stmt1, gimple *stmt2)
290538fd1498Szrj {
290638fd1498Szrj   if (gimple_assign_single_p (stmt1))
290738fd1498Szrj     return gimple_assign_single_p (stmt2);
290838fd1498Szrj 
290938fd1498Szrj   if (is_gimple_call (stmt1) && gimple_call_internal_p (stmt1))
291038fd1498Szrj     {
291138fd1498Szrj       /* Check for two masked loads or two masked stores.  */
291238fd1498Szrj       if (!is_gimple_call (stmt2) || !gimple_call_internal_p (stmt2))
291338fd1498Szrj 	return false;
291438fd1498Szrj       internal_fn ifn = gimple_call_internal_fn (stmt1);
291538fd1498Szrj       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
291638fd1498Szrj 	return false;
291738fd1498Szrj       if (ifn != gimple_call_internal_fn (stmt2))
291838fd1498Szrj 	return false;
291938fd1498Szrj 
292038fd1498Szrj       /* Check that the masks are the same.  Cope with casts of masks,
292138fd1498Szrj 	 like those created by build_mask_conversion.  */
292238fd1498Szrj       tree mask1 = gimple_call_arg (stmt1, 2);
292338fd1498Szrj       tree mask2 = gimple_call_arg (stmt2, 2);
292438fd1498Szrj       if (!operand_equal_p (mask1, mask2, 0))
292538fd1498Szrj 	{
292638fd1498Szrj 	  mask1 = strip_conversion (mask1);
292738fd1498Szrj 	  if (!mask1)
292838fd1498Szrj 	    return false;
292938fd1498Szrj 	  mask2 = strip_conversion (mask2);
293038fd1498Szrj 	  if (!mask2)
293138fd1498Szrj 	    return false;
293238fd1498Szrj 	  if (!operand_equal_p (mask1, mask2, 0))
293338fd1498Szrj 	    return false;
293438fd1498Szrj 	}
293538fd1498Szrj       return true;
293638fd1498Szrj     }
293738fd1498Szrj 
293838fd1498Szrj   return false;
293938fd1498Szrj }
294038fd1498Szrj 
294138fd1498Szrj /* Function vect_analyze_data_ref_accesses.
294238fd1498Szrj 
294338fd1498Szrj    Analyze the access pattern of all the data references in the loop.
294438fd1498Szrj 
294538fd1498Szrj    FORNOW: the only access pattern that is considered vectorizable is a
294638fd1498Szrj 	   simple step 1 (consecutive) access.
294738fd1498Szrj 
294838fd1498Szrj    FORNOW: handle only arrays and pointer accesses.  */
294938fd1498Szrj 
295038fd1498Szrj bool
vect_analyze_data_ref_accesses(vec_info * vinfo)295138fd1498Szrj vect_analyze_data_ref_accesses (vec_info *vinfo)
295238fd1498Szrj {
295338fd1498Szrj   unsigned int i;
295438fd1498Szrj   vec<data_reference_p> datarefs = vinfo->datarefs;
295538fd1498Szrj   struct data_reference *dr;
295638fd1498Szrj 
295738fd1498Szrj   if (dump_enabled_p ())
295838fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
295938fd1498Szrj                      "=== vect_analyze_data_ref_accesses ===\n");
296038fd1498Szrj 
296138fd1498Szrj   if (datarefs.is_empty ())
296238fd1498Szrj     return true;
296338fd1498Szrj 
296438fd1498Szrj   /* Sort the array of datarefs to make building the interleaving chains
296538fd1498Szrj      linear.  Don't modify the original vector's order, it is needed for
296638fd1498Szrj      determining what dependencies are reversed.  */
296738fd1498Szrj   vec<data_reference_p> datarefs_copy = datarefs.copy ();
296838fd1498Szrj   datarefs_copy.qsort (dr_group_sort_cmp);
296938fd1498Szrj 
297038fd1498Szrj   /* Build the interleaving chains.  */
297138fd1498Szrj   for (i = 0; i < datarefs_copy.length () - 1;)
297238fd1498Szrj     {
297338fd1498Szrj       data_reference_p dra = datarefs_copy[i];
297438fd1498Szrj       stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
297538fd1498Szrj       stmt_vec_info lastinfo = NULL;
297638fd1498Szrj       if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
297738fd1498Szrj 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
297838fd1498Szrj 	{
297938fd1498Szrj 	  ++i;
298038fd1498Szrj 	  continue;
298138fd1498Szrj 	}
298238fd1498Szrj       for (i = i + 1; i < datarefs_copy.length (); ++i)
298338fd1498Szrj 	{
298438fd1498Szrj 	  data_reference_p drb = datarefs_copy[i];
298538fd1498Szrj 	  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
298638fd1498Szrj 	  if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
298738fd1498Szrj 	      || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
298838fd1498Szrj 	    break;
298938fd1498Szrj 
299038fd1498Szrj 	  /* ???  Imperfect sorting (non-compatible types, non-modulo
299138fd1498Szrj 	     accesses, same accesses) can lead to a group to be artificially
299238fd1498Szrj 	     split here as we don't just skip over those.  If it really
299338fd1498Szrj 	     matters we can push those to a worklist and re-iterate
299438fd1498Szrj 	     over them.  The we can just skip ahead to the next DR here.  */
299538fd1498Szrj 
299638fd1498Szrj 	  /* DRs in a different loop should not be put into the same
299738fd1498Szrj 	     interleaving group.  */
299838fd1498Szrj 	  if (gimple_bb (DR_STMT (dra))->loop_father
299938fd1498Szrj 	      != gimple_bb (DR_STMT (drb))->loop_father)
300038fd1498Szrj 	    break;
300138fd1498Szrj 
300238fd1498Szrj 	  /* Check that the data-refs have same first location (except init)
300338fd1498Szrj 	     and they are both either store or load (not load and store,
300438fd1498Szrj 	     not masked loads or stores).  */
300538fd1498Szrj 	  if (DR_IS_READ (dra) != DR_IS_READ (drb)
300638fd1498Szrj 	      || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
300738fd1498Szrj 					DR_BASE_ADDRESS (drb)) != 0
300838fd1498Szrj 	      || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
300938fd1498Szrj 	      || !can_group_stmts_p (DR_STMT (dra), DR_STMT (drb)))
301038fd1498Szrj 	    break;
301138fd1498Szrj 
301238fd1498Szrj 	  /* Check that the data-refs have the same constant size.  */
301338fd1498Szrj 	  tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
301438fd1498Szrj 	  tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
301538fd1498Szrj 	  if (!tree_fits_uhwi_p (sza)
301638fd1498Szrj 	      || !tree_fits_uhwi_p (szb)
301738fd1498Szrj 	      || !tree_int_cst_equal (sza, szb))
301838fd1498Szrj 	    break;
301938fd1498Szrj 
302038fd1498Szrj 	  /* Check that the data-refs have the same step.  */
302138fd1498Szrj 	  if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
302238fd1498Szrj 	    break;
302338fd1498Szrj 
302438fd1498Szrj 	  /* Check the types are compatible.
302538fd1498Szrj 	     ???  We don't distinguish this during sorting.  */
302638fd1498Szrj 	  if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
302738fd1498Szrj 				   TREE_TYPE (DR_REF (drb))))
302838fd1498Szrj 	    break;
302938fd1498Szrj 
303038fd1498Szrj 	  /* Check that the DR_INITs are compile-time constants.  */
303138fd1498Szrj 	  if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST
303238fd1498Szrj 	      || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
303338fd1498Szrj 	    break;
303438fd1498Szrj 
303538fd1498Szrj 	  /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
303638fd1498Szrj 	  HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
303738fd1498Szrj 	  HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
303838fd1498Szrj 	  HOST_WIDE_INT init_prev
303938fd1498Szrj 	    = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
304038fd1498Szrj 	  gcc_assert (init_a <= init_b
304138fd1498Szrj 		      && init_a <= init_prev
304238fd1498Szrj 		      && init_prev <= init_b);
304338fd1498Szrj 
304438fd1498Szrj 	  /* Do not place the same access in the interleaving chain twice.  */
304538fd1498Szrj 	  if (init_b == init_prev)
304638fd1498Szrj 	    {
304738fd1498Szrj 	      gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
304838fd1498Szrj 			  < gimple_uid (DR_STMT (drb)));
304938fd1498Szrj 	      /* ???  For now we simply "drop" the later reference which is
305038fd1498Szrj 	         otherwise the same rather than finishing off this group.
305138fd1498Szrj 		 In the end we'd want to re-process duplicates forming
305238fd1498Szrj 		 multiple groups from the refs, likely by just collecting
305338fd1498Szrj 		 all candidates (including duplicates and split points
305438fd1498Szrj 		 below) in a vector and then process them together.  */
305538fd1498Szrj 	      continue;
305638fd1498Szrj 	    }
305738fd1498Szrj 
305838fd1498Szrj 	  /* If init_b == init_a + the size of the type * k, we have an
305938fd1498Szrj 	     interleaving, and DRA is accessed before DRB.  */
306038fd1498Szrj 	  HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
306138fd1498Szrj 	  if (type_size_a == 0
306238fd1498Szrj 	      || (init_b - init_a) % type_size_a != 0)
306338fd1498Szrj 	    break;
306438fd1498Szrj 
306538fd1498Szrj 	  /* If we have a store, the accesses are adjacent.  This splits
306638fd1498Szrj 	     groups into chunks we support (we don't support vectorization
306738fd1498Szrj 	     of stores with gaps).  */
306838fd1498Szrj 	  if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
306938fd1498Szrj 	    break;
307038fd1498Szrj 
307138fd1498Szrj 	  /* If the step (if not zero or non-constant) is greater than the
307238fd1498Szrj 	     difference between data-refs' inits this splits groups into
307338fd1498Szrj 	     suitable sizes.  */
307438fd1498Szrj 	  if (tree_fits_shwi_p (DR_STEP (dra)))
307538fd1498Szrj 	    {
307638fd1498Szrj 	      HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
307738fd1498Szrj 	      if (step != 0 && step <= (init_b - init_a))
307838fd1498Szrj 		break;
307938fd1498Szrj 	    }
308038fd1498Szrj 
308138fd1498Szrj 	  if (dump_enabled_p ())
308238fd1498Szrj 	    {
308338fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
308438fd1498Szrj 			       "Detected interleaving ");
308538fd1498Szrj 	      if (DR_IS_READ (dra))
308638fd1498Szrj 		dump_printf (MSG_NOTE, "load ");
308738fd1498Szrj 	      else
308838fd1498Szrj 		dump_printf (MSG_NOTE, "store ");
308938fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
309038fd1498Szrj 	      dump_printf (MSG_NOTE,  " and ");
309138fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
309238fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
309338fd1498Szrj 	    }
309438fd1498Szrj 
309538fd1498Szrj 	  /* Link the found element into the group list.  */
309638fd1498Szrj 	  if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
309738fd1498Szrj 	    {
309838fd1498Szrj 	      GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
309938fd1498Szrj 	      lastinfo = stmtinfo_a;
310038fd1498Szrj 	    }
310138fd1498Szrj 	  GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
310238fd1498Szrj 	  GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
310338fd1498Szrj 	  lastinfo = stmtinfo_b;
310438fd1498Szrj 	}
310538fd1498Szrj     }
310638fd1498Szrj 
310738fd1498Szrj   FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
310838fd1498Szrj     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
310938fd1498Szrj         && !vect_analyze_data_ref_access (dr))
311038fd1498Szrj       {
311138fd1498Szrj 	if (dump_enabled_p ())
311238fd1498Szrj 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
311338fd1498Szrj 	                   "not vectorized: complicated access pattern.\n");
311438fd1498Szrj 
311538fd1498Szrj         if (is_a <bb_vec_info> (vinfo))
311638fd1498Szrj           {
311738fd1498Szrj             /* Mark the statement as not vectorizable.  */
311838fd1498Szrj             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
311938fd1498Szrj             continue;
312038fd1498Szrj           }
312138fd1498Szrj         else
312238fd1498Szrj 	  {
312338fd1498Szrj 	    datarefs_copy.release ();
312438fd1498Szrj 	    return false;
312538fd1498Szrj 	  }
312638fd1498Szrj       }
312738fd1498Szrj 
312838fd1498Szrj   datarefs_copy.release ();
312938fd1498Szrj   return true;
313038fd1498Szrj }
313138fd1498Szrj 
313238fd1498Szrj /* Function vect_vfa_segment_size.
313338fd1498Szrj 
313438fd1498Szrj    Input:
313538fd1498Szrj      DR: The data reference.
313638fd1498Szrj      LENGTH_FACTOR: segment length to consider.
313738fd1498Szrj 
313838fd1498Szrj    Return a value suitable for the dr_with_seg_len::seg_len field.
313938fd1498Szrj    This is the "distance travelled" by the pointer from the first
314038fd1498Szrj    iteration in the segment to the last.  Note that it does not include
314138fd1498Szrj    the size of the access; in effect it only describes the first byte.  */
314238fd1498Szrj 
314338fd1498Szrj static tree
vect_vfa_segment_size(struct data_reference * dr,tree length_factor)314438fd1498Szrj vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
314538fd1498Szrj {
314638fd1498Szrj   length_factor = size_binop (MINUS_EXPR,
314738fd1498Szrj 			      fold_convert (sizetype, length_factor),
314838fd1498Szrj 			      size_one_node);
314938fd1498Szrj   return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr)),
315038fd1498Szrj 		     length_factor);
315138fd1498Szrj }
315238fd1498Szrj 
315338fd1498Szrj /* Return a value that, when added to abs (vect_vfa_segment_size (dr)),
315438fd1498Szrj    gives the worst-case number of bytes covered by the segment.  */
315538fd1498Szrj 
315638fd1498Szrj static unsigned HOST_WIDE_INT
vect_vfa_access_size(data_reference * dr)315738fd1498Szrj vect_vfa_access_size (data_reference *dr)
315838fd1498Szrj {
315938fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (DR_STMT (dr));
316038fd1498Szrj   tree ref_type = TREE_TYPE (DR_REF (dr));
316138fd1498Szrj   unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
316238fd1498Szrj   unsigned HOST_WIDE_INT access_size = ref_size;
316338fd1498Szrj   if (GROUP_FIRST_ELEMENT (stmt_vinfo))
316438fd1498Szrj     {
316538fd1498Szrj       gcc_assert (GROUP_FIRST_ELEMENT (stmt_vinfo) == DR_STMT (dr));
316638fd1498Szrj       access_size *= GROUP_SIZE (stmt_vinfo) - GROUP_GAP (stmt_vinfo);
316738fd1498Szrj     }
316838fd1498Szrj   if (STMT_VINFO_VEC_STMT (stmt_vinfo)
316938fd1498Szrj       && (vect_supportable_dr_alignment (dr, false)
317038fd1498Szrj 	  == dr_explicit_realign_optimized))
317138fd1498Szrj     {
317238fd1498Szrj       /* We might access a full vector's worth.  */
317338fd1498Szrj       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
317438fd1498Szrj       access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
317538fd1498Szrj     }
317638fd1498Szrj   return access_size;
317738fd1498Szrj }
317838fd1498Szrj 
317938fd1498Szrj /* Get the minimum alignment for all the scalar accesses that DR describes.  */
318038fd1498Szrj 
318138fd1498Szrj static unsigned int
vect_vfa_align(const data_reference * dr)318238fd1498Szrj vect_vfa_align (const data_reference *dr)
318338fd1498Szrj {
318438fd1498Szrj   return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr)));
318538fd1498Szrj }
318638fd1498Szrj 
318738fd1498Szrj /* Function vect_no_alias_p.
318838fd1498Szrj 
318938fd1498Szrj    Given data references A and B with equal base and offset, see whether
319038fd1498Szrj    the alias relation can be decided at compilation time.  Return 1 if
319138fd1498Szrj    it can and the references alias, 0 if it can and the references do
319238fd1498Szrj    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A,
319338fd1498Szrj    SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
319438fd1498Szrj    of dr_with_seg_len::{seg_len,access_size} for A and B.  */
319538fd1498Szrj 
319638fd1498Szrj static int
vect_compile_time_alias(struct data_reference * a,struct data_reference * b,tree segment_length_a,tree segment_length_b,unsigned HOST_WIDE_INT access_size_a,unsigned HOST_WIDE_INT access_size_b)319738fd1498Szrj vect_compile_time_alias (struct data_reference *a, struct data_reference *b,
319838fd1498Szrj 			 tree segment_length_a, tree segment_length_b,
319938fd1498Szrj 			 unsigned HOST_WIDE_INT access_size_a,
320038fd1498Szrj 			 unsigned HOST_WIDE_INT access_size_b)
320138fd1498Szrj {
320238fd1498Szrj   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a));
320338fd1498Szrj   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b));
320438fd1498Szrj   poly_uint64 const_length_a;
320538fd1498Szrj   poly_uint64 const_length_b;
320638fd1498Szrj 
320738fd1498Szrj   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
320838fd1498Szrj      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
320938fd1498Szrj      [a, a+12) */
321038fd1498Szrj   if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
321138fd1498Szrj     {
321238fd1498Szrj       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
321338fd1498Szrj       offset_a = (offset_a + access_size_a) - const_length_a;
321438fd1498Szrj     }
321538fd1498Szrj   else
321638fd1498Szrj     const_length_a = tree_to_poly_uint64 (segment_length_a);
321738fd1498Szrj   if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
321838fd1498Szrj     {
321938fd1498Szrj       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
322038fd1498Szrj       offset_b = (offset_b + access_size_b) - const_length_b;
322138fd1498Szrj     }
322238fd1498Szrj   else
322338fd1498Szrj     const_length_b = tree_to_poly_uint64 (segment_length_b);
322438fd1498Szrj 
322538fd1498Szrj   const_length_a += access_size_a;
322638fd1498Szrj   const_length_b += access_size_b;
322738fd1498Szrj 
322838fd1498Szrj   if (ranges_known_overlap_p (offset_a, const_length_a,
322938fd1498Szrj 			      offset_b, const_length_b))
323038fd1498Szrj     return 1;
323138fd1498Szrj 
323238fd1498Szrj   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
323338fd1498Szrj 			       offset_b, const_length_b))
323438fd1498Szrj     return 0;
323538fd1498Szrj 
323638fd1498Szrj   return -1;
323738fd1498Szrj }
323838fd1498Szrj 
323938fd1498Szrj /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
324038fd1498Szrj    in DDR is >= VF.  */
324138fd1498Szrj 
324238fd1498Szrj static bool
dependence_distance_ge_vf(data_dependence_relation * ddr,unsigned int loop_depth,poly_uint64 vf)324338fd1498Szrj dependence_distance_ge_vf (data_dependence_relation *ddr,
324438fd1498Szrj 			   unsigned int loop_depth, poly_uint64 vf)
324538fd1498Szrj {
324638fd1498Szrj   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
324738fd1498Szrj       || DDR_NUM_DIST_VECTS (ddr) == 0)
324838fd1498Szrj     return false;
324938fd1498Szrj 
325038fd1498Szrj   /* If the dependence is exact, we should have limited the VF instead.  */
325138fd1498Szrj   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
325238fd1498Szrj 
325338fd1498Szrj   unsigned int i;
325438fd1498Szrj   lambda_vector dist_v;
325538fd1498Szrj   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
325638fd1498Szrj     {
325738fd1498Szrj       HOST_WIDE_INT dist = dist_v[loop_depth];
325838fd1498Szrj       if (dist != 0
325938fd1498Szrj 	  && !(dist > 0 && DDR_REVERSED_P (ddr))
326038fd1498Szrj 	  && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
326138fd1498Szrj 	return false;
326238fd1498Szrj     }
326338fd1498Szrj 
326438fd1498Szrj   if (dump_enabled_p ())
326538fd1498Szrj     {
326638fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
326738fd1498Szrj 		       "dependence distance between ");
326838fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
326938fd1498Szrj       dump_printf (MSG_NOTE,  " and ");
327038fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
327138fd1498Szrj       dump_printf (MSG_NOTE,  " is >= VF\n");
327238fd1498Szrj     }
327338fd1498Szrj 
327438fd1498Szrj   return true;
327538fd1498Szrj }
327638fd1498Szrj 
327738fd1498Szrj /* Dump LOWER_BOUND using flags DUMP_KIND.  Dumps are known to be enabled.  */
327838fd1498Szrj 
327938fd1498Szrj static void
dump_lower_bound(int dump_kind,const vec_lower_bound & lower_bound)328038fd1498Szrj dump_lower_bound (int dump_kind, const vec_lower_bound &lower_bound)
328138fd1498Szrj {
328238fd1498Szrj   dump_printf (dump_kind, "%s (", lower_bound.unsigned_p ? "unsigned" : "abs");
328338fd1498Szrj   dump_generic_expr (dump_kind, TDF_SLIM, lower_bound.expr);
328438fd1498Szrj   dump_printf (dump_kind, ") >= ");
328538fd1498Szrj   dump_dec (dump_kind, lower_bound.min_value);
328638fd1498Szrj }
328738fd1498Szrj 
328838fd1498Szrj /* Record that the vectorized loop requires the vec_lower_bound described
328938fd1498Szrj    by EXPR, UNSIGNED_P and MIN_VALUE.  */
329038fd1498Szrj 
329138fd1498Szrj static void
vect_check_lower_bound(loop_vec_info loop_vinfo,tree expr,bool unsigned_p,poly_uint64 min_value)329238fd1498Szrj vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
329338fd1498Szrj 			poly_uint64 min_value)
329438fd1498Szrj {
329538fd1498Szrj   vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
329638fd1498Szrj   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
329738fd1498Szrj     if (operand_equal_p (lower_bounds[i].expr, expr, 0))
329838fd1498Szrj       {
329938fd1498Szrj 	unsigned_p &= lower_bounds[i].unsigned_p;
330038fd1498Szrj 	min_value = upper_bound (lower_bounds[i].min_value, min_value);
330138fd1498Szrj 	if (lower_bounds[i].unsigned_p != unsigned_p
330238fd1498Szrj 	    || maybe_lt (lower_bounds[i].min_value, min_value))
330338fd1498Szrj 	  {
330438fd1498Szrj 	    lower_bounds[i].unsigned_p = unsigned_p;
330538fd1498Szrj 	    lower_bounds[i].min_value = min_value;
330638fd1498Szrj 	    if (dump_enabled_p ())
330738fd1498Szrj 	      {
330838fd1498Szrj 		dump_printf_loc (MSG_NOTE, vect_location,
330938fd1498Szrj 				 "updating run-time check to ");
331038fd1498Szrj 		dump_lower_bound (MSG_NOTE, lower_bounds[i]);
331138fd1498Szrj 		dump_printf (MSG_NOTE, "\n");
331238fd1498Szrj 	      }
331338fd1498Szrj 	  }
331438fd1498Szrj 	return;
331538fd1498Szrj       }
331638fd1498Szrj 
331738fd1498Szrj   vec_lower_bound lower_bound (expr, unsigned_p, min_value);
331838fd1498Szrj   if (dump_enabled_p ())
331938fd1498Szrj     {
332038fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
332138fd1498Szrj       dump_lower_bound (MSG_NOTE, lower_bound);
332238fd1498Szrj       dump_printf (MSG_NOTE, "\n");
332338fd1498Szrj     }
332438fd1498Szrj   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
332538fd1498Szrj }
332638fd1498Szrj 
332738fd1498Szrj /* Return true if it's unlikely that the step of the vectorized form of DR
332838fd1498Szrj    will span fewer than GAP bytes.  */
332938fd1498Szrj 
333038fd1498Szrj static bool
vect_small_gap_p(loop_vec_info loop_vinfo,data_reference * dr,poly_int64 gap)333138fd1498Szrj vect_small_gap_p (loop_vec_info loop_vinfo, data_reference *dr, poly_int64 gap)
333238fd1498Szrj {
333338fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
333438fd1498Szrj   HOST_WIDE_INT count
333538fd1498Szrj     = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
333638fd1498Szrj   if (GROUP_FIRST_ELEMENT (stmt_info))
333738fd1498Szrj     count *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
333838fd1498Szrj   return estimated_poly_value (gap) <= count * vect_get_scalar_dr_size (dr);
333938fd1498Szrj }
334038fd1498Szrj 
334138fd1498Szrj /* Return true if we know that there is no alias between DR_A and DR_B
334238fd1498Szrj    when abs (DR_STEP (DR_A)) >= N for some N.  When returning true, set
334338fd1498Szrj    *LOWER_BOUND_OUT to this N.  */
334438fd1498Szrj 
334538fd1498Szrj static bool
vectorizable_with_step_bound_p(data_reference * dr_a,data_reference * dr_b,poly_uint64 * lower_bound_out)334638fd1498Szrj vectorizable_with_step_bound_p (data_reference *dr_a, data_reference *dr_b,
334738fd1498Szrj 				poly_uint64 *lower_bound_out)
334838fd1498Szrj {
334938fd1498Szrj   /* Check that there is a constant gap of known sign between DR_A
335038fd1498Szrj      and DR_B.  */
335138fd1498Szrj   poly_int64 init_a, init_b;
335238fd1498Szrj   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
335338fd1498Szrj       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
335438fd1498Szrj       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
335538fd1498Szrj       || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
335638fd1498Szrj       || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
335738fd1498Szrj       || !ordered_p (init_a, init_b))
335838fd1498Szrj     return false;
335938fd1498Szrj 
336038fd1498Szrj   /* Sort DR_A and DR_B by the address they access.  */
336138fd1498Szrj   if (maybe_lt (init_b, init_a))
336238fd1498Szrj     {
336338fd1498Szrj       std::swap (init_a, init_b);
336438fd1498Szrj       std::swap (dr_a, dr_b);
336538fd1498Szrj     }
336638fd1498Szrj 
336738fd1498Szrj   /* If the two accesses could be dependent within a scalar iteration,
336838fd1498Szrj      make sure that we'd retain their order.  */
336938fd1498Szrj   if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_a), init_b)
337038fd1498Szrj       && !vect_preserves_scalar_order_p (DR_STMT (dr_a), DR_STMT (dr_b)))
337138fd1498Szrj     return false;
337238fd1498Szrj 
337338fd1498Szrj   /* There is no alias if abs (DR_STEP) is greater than or equal to
337438fd1498Szrj      the bytes spanned by the combination of the two accesses.  */
337538fd1498Szrj   *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_b) - init_a;
337638fd1498Szrj   return true;
337738fd1498Szrj }
337838fd1498Szrj 
337938fd1498Szrj /* Function vect_prune_runtime_alias_test_list.
338038fd1498Szrj 
338138fd1498Szrj    Prune a list of ddrs to be tested at run-time by versioning for alias.
338238fd1498Szrj    Merge several alias checks into one if possible.
338338fd1498Szrj    Return FALSE if resulting list of ddrs is longer then allowed by
338438fd1498Szrj    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
338538fd1498Szrj 
338638fd1498Szrj bool
vect_prune_runtime_alias_test_list(loop_vec_info loop_vinfo)338738fd1498Szrj vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
338838fd1498Szrj {
338938fd1498Szrj   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
339038fd1498Szrj   hash_set <tree_pair_hash> compared_objects;
339138fd1498Szrj 
339238fd1498Szrj   vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
339338fd1498Szrj   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
339438fd1498Szrj     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
339538fd1498Szrj   vec<vec_object_pair> &check_unequal_addrs
339638fd1498Szrj     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
339738fd1498Szrj   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
339838fd1498Szrj   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
339938fd1498Szrj 
340038fd1498Szrj   ddr_p ddr;
340138fd1498Szrj   unsigned int i;
340238fd1498Szrj   tree length_factor;
340338fd1498Szrj 
340438fd1498Szrj   if (dump_enabled_p ())
340538fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
340638fd1498Szrj                      "=== vect_prune_runtime_alias_test_list ===\n");
340738fd1498Szrj 
340838fd1498Szrj   /* Step values are irrelevant for aliasing if the number of vector
340938fd1498Szrj      iterations is equal to the number of scalar iterations (which can
341038fd1498Szrj      happen for fully-SLP loops).  */
341138fd1498Szrj   bool ignore_step_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
341238fd1498Szrj 
341338fd1498Szrj   if (!ignore_step_p)
341438fd1498Szrj     {
341538fd1498Szrj       /* Convert the checks for nonzero steps into bound tests.  */
341638fd1498Szrj       tree value;
341738fd1498Szrj       FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
341838fd1498Szrj 	vect_check_lower_bound (loop_vinfo, value, true, 1);
341938fd1498Szrj     }
342038fd1498Szrj 
342138fd1498Szrj   if (may_alias_ddrs.is_empty ())
342238fd1498Szrj     return true;
342338fd1498Szrj 
342438fd1498Szrj   comp_alias_ddrs.create (may_alias_ddrs.length ());
342538fd1498Szrj 
342638fd1498Szrj   unsigned int loop_depth
342738fd1498Szrj     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
342838fd1498Szrj 			  LOOP_VINFO_LOOP_NEST (loop_vinfo));
342938fd1498Szrj 
343038fd1498Szrj   /* First, we collect all data ref pairs for aliasing checks.  */
343138fd1498Szrj   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
343238fd1498Szrj     {
343338fd1498Szrj       int comp_res;
343438fd1498Szrj       poly_uint64 lower_bound;
343538fd1498Szrj       struct data_reference *dr_a, *dr_b;
343638fd1498Szrj       gimple *dr_group_first_a, *dr_group_first_b;
343738fd1498Szrj       tree segment_length_a, segment_length_b;
343838fd1498Szrj       unsigned HOST_WIDE_INT access_size_a, access_size_b;
343938fd1498Szrj       unsigned int align_a, align_b;
344038fd1498Szrj       gimple *stmt_a, *stmt_b;
344138fd1498Szrj 
344238fd1498Szrj       /* Ignore the alias if the VF we chose ended up being no greater
344338fd1498Szrj 	 than the dependence distance.  */
344438fd1498Szrj       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
344538fd1498Szrj 	continue;
344638fd1498Szrj 
344738fd1498Szrj       if (DDR_OBJECT_A (ddr))
344838fd1498Szrj 	{
344938fd1498Szrj 	  vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
345038fd1498Szrj 	  if (!compared_objects.add (new_pair))
345138fd1498Szrj 	    {
345238fd1498Szrj 	      if (dump_enabled_p ())
345338fd1498Szrj 		{
345438fd1498Szrj 		  dump_printf_loc (MSG_NOTE, vect_location, "checking that ");
345538fd1498Szrj 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);
345638fd1498Szrj 		  dump_printf (MSG_NOTE, " and ");
345738fd1498Szrj 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);
345838fd1498Szrj 		  dump_printf (MSG_NOTE, " have different addresses\n");
345938fd1498Szrj 		}
346038fd1498Szrj 	      LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
346138fd1498Szrj 	    }
346238fd1498Szrj 	  continue;
346338fd1498Szrj 	}
346438fd1498Szrj 
346538fd1498Szrj       dr_a = DDR_A (ddr);
346638fd1498Szrj       stmt_a = DR_STMT (DDR_A (ddr));
346738fd1498Szrj 
346838fd1498Szrj       dr_b = DDR_B (ddr);
346938fd1498Szrj       stmt_b = DR_STMT (DDR_B (ddr));
347038fd1498Szrj 
347138fd1498Szrj       /* Skip the pair if inter-iteration dependencies are irrelevant
347238fd1498Szrj 	 and intra-iteration dependencies are guaranteed to be honored.  */
347338fd1498Szrj       if (ignore_step_p
347438fd1498Szrj 	  && (vect_preserves_scalar_order_p (stmt_a, stmt_b)
347538fd1498Szrj 	      || vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)))
347638fd1498Szrj 	{
347738fd1498Szrj 	  if (dump_enabled_p ())
347838fd1498Szrj 	    {
347938fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
348038fd1498Szrj 			       "no need for alias check between ");
348138fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
348238fd1498Szrj 	      dump_printf (MSG_NOTE, " and ");
348338fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
348438fd1498Szrj 	      dump_printf (MSG_NOTE, " when VF is 1\n");
348538fd1498Szrj 	    }
348638fd1498Szrj 	  continue;
348738fd1498Szrj 	}
348838fd1498Szrj 
348938fd1498Szrj       /* See whether we can handle the alias using a bounds check on
349038fd1498Szrj 	 the step, and whether that's likely to be the best approach.
349138fd1498Szrj 	 (It might not be, for example, if the minimum step is much larger
349238fd1498Szrj 	 than the number of bytes handled by one vector iteration.)  */
349338fd1498Szrj       if (!ignore_step_p
349438fd1498Szrj 	  && TREE_CODE (DR_STEP (dr_a)) != INTEGER_CST
349538fd1498Szrj 	  && vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)
349638fd1498Szrj 	  && (vect_small_gap_p (loop_vinfo, dr_a, lower_bound)
349738fd1498Szrj 	      || vect_small_gap_p (loop_vinfo, dr_b, lower_bound)))
349838fd1498Szrj 	{
349938fd1498Szrj 	  bool unsigned_p = dr_known_forward_stride_p (dr_a);
350038fd1498Szrj 	  if (dump_enabled_p ())
350138fd1498Szrj 	    {
350238fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location, "no alias between ");
350338fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
350438fd1498Szrj 	      dump_printf (MSG_NOTE, " and ");
350538fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
350638fd1498Szrj 	      dump_printf (MSG_NOTE, " when the step ");
350738fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_STEP (dr_a));
350838fd1498Szrj 	      dump_printf (MSG_NOTE, " is outside ");
350938fd1498Szrj 	      if (unsigned_p)
351038fd1498Szrj 		dump_printf (MSG_NOTE, "[0");
351138fd1498Szrj 	      else
351238fd1498Szrj 		{
351338fd1498Szrj 		  dump_printf (MSG_NOTE, "(");
351438fd1498Szrj 		  dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
351538fd1498Szrj 		}
351638fd1498Szrj 	      dump_printf (MSG_NOTE, ", ");
351738fd1498Szrj 	      dump_dec (MSG_NOTE, lower_bound);
351838fd1498Szrj 	      dump_printf (MSG_NOTE, ")\n");
351938fd1498Szrj 	    }
352038fd1498Szrj 	  vect_check_lower_bound (loop_vinfo, DR_STEP (dr_a), unsigned_p,
352138fd1498Szrj 				  lower_bound);
352238fd1498Szrj 	  continue;
352338fd1498Szrj 	}
352438fd1498Szrj 
352538fd1498Szrj       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
352638fd1498Szrj       if (dr_group_first_a)
352738fd1498Szrj 	{
352838fd1498Szrj 	  stmt_a = dr_group_first_a;
352938fd1498Szrj 	  dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
353038fd1498Szrj 	}
353138fd1498Szrj 
353238fd1498Szrj       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
353338fd1498Szrj       if (dr_group_first_b)
353438fd1498Szrj 	{
353538fd1498Szrj 	  stmt_b = dr_group_first_b;
353638fd1498Szrj 	  dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
353738fd1498Szrj 	}
353838fd1498Szrj 
353938fd1498Szrj       if (ignore_step_p)
354038fd1498Szrj 	{
354138fd1498Szrj 	  segment_length_a = size_zero_node;
354238fd1498Szrj 	  segment_length_b = size_zero_node;
354338fd1498Szrj 	}
354438fd1498Szrj       else
354538fd1498Szrj 	{
354638fd1498Szrj 	  if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
354738fd1498Szrj 	    length_factor = scalar_loop_iters;
354838fd1498Szrj 	  else
354938fd1498Szrj 	    length_factor = size_int (vect_factor);
355038fd1498Szrj 	  segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
355138fd1498Szrj 	  segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
355238fd1498Szrj 	}
355338fd1498Szrj       access_size_a = vect_vfa_access_size (dr_a);
355438fd1498Szrj       access_size_b = vect_vfa_access_size (dr_b);
355538fd1498Szrj       align_a = vect_vfa_align (dr_a);
355638fd1498Szrj       align_b = vect_vfa_align (dr_b);
355738fd1498Szrj 
355838fd1498Szrj       comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
355938fd1498Szrj 					DR_BASE_ADDRESS (dr_b));
356038fd1498Szrj       if (comp_res == 0)
356138fd1498Szrj 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a),
356238fd1498Szrj 					  DR_OFFSET (dr_b));
356338fd1498Szrj 
356438fd1498Szrj       /* See whether the alias is known at compilation time.  */
356538fd1498Szrj       if (comp_res == 0
356638fd1498Szrj 	  && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
356738fd1498Szrj 	  && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST
356838fd1498Szrj 	  && poly_int_tree_p (segment_length_a)
356938fd1498Szrj 	  && poly_int_tree_p (segment_length_b))
357038fd1498Szrj 	{
357138fd1498Szrj 	  int res = vect_compile_time_alias (dr_a, dr_b,
357238fd1498Szrj 					     segment_length_a,
357338fd1498Szrj 					     segment_length_b,
357438fd1498Szrj 					     access_size_a,
357538fd1498Szrj 					     access_size_b);
357638fd1498Szrj 	  if (res >= 0 && dump_enabled_p ())
357738fd1498Szrj 	    {
357838fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
357938fd1498Szrj 			       "can tell at compile time that ");
358038fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
358138fd1498Szrj 	      dump_printf (MSG_NOTE, " and ");
358238fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
358338fd1498Szrj 	      if (res == 0)
358438fd1498Szrj 		dump_printf (MSG_NOTE, " do not alias\n");
358538fd1498Szrj 	      else
358638fd1498Szrj 		dump_printf (MSG_NOTE, " alias\n");
358738fd1498Szrj 	    }
358838fd1498Szrj 
358938fd1498Szrj 	  if (res == 0)
359038fd1498Szrj 	    continue;
359138fd1498Szrj 
359238fd1498Szrj 	  if (res == 1)
359338fd1498Szrj 	    {
359438fd1498Szrj 	      if (dump_enabled_p ())
359538fd1498Szrj 		dump_printf_loc (MSG_NOTE, vect_location,
359638fd1498Szrj 				 "not vectorized: compilation time alias.\n");
359738fd1498Szrj 	      return false;
359838fd1498Szrj 	    }
359938fd1498Szrj 	}
360038fd1498Szrj 
360138fd1498Szrj       dr_with_seg_len_pair_t dr_with_seg_len_pair
360238fd1498Szrj 	(dr_with_seg_len (dr_a, segment_length_a, access_size_a, align_a),
360338fd1498Szrj 	 dr_with_seg_len (dr_b, segment_length_b, access_size_b, align_b));
360438fd1498Szrj 
360538fd1498Szrj       /* Canonicalize pairs by sorting the two DR members.  */
360638fd1498Szrj       if (comp_res > 0)
360738fd1498Szrj 	std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
360838fd1498Szrj 
360938fd1498Szrj       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
361038fd1498Szrj     }
361138fd1498Szrj 
361238fd1498Szrj   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
361338fd1498Szrj 
361438fd1498Szrj   unsigned int count = (comp_alias_ddrs.length ()
361538fd1498Szrj 			+ check_unequal_addrs.length ());
361638fd1498Szrj 
361738fd1498Szrj   dump_printf_loc (MSG_NOTE, vect_location,
361838fd1498Szrj 		   "improved number of alias checks from %d to %d\n",
361938fd1498Szrj 		   may_alias_ddrs.length (), count);
362038fd1498Szrj   if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
362138fd1498Szrj     {
362238fd1498Szrj       if (dump_enabled_p ())
362338fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
362438fd1498Szrj 			 "number of versioning for alias "
362538fd1498Szrj 			 "run-time tests exceeds %d "
362638fd1498Szrj 			 "(--param vect-max-version-for-alias-checks)\n",
362738fd1498Szrj 			 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
362838fd1498Szrj       return false;
362938fd1498Szrj     }
363038fd1498Szrj 
363138fd1498Szrj   return true;
363238fd1498Szrj }
363338fd1498Szrj 
363438fd1498Szrj /* Check whether we can use an internal function for a gather load
363538fd1498Szrj    or scatter store.  READ_P is true for loads and false for stores.
363638fd1498Szrj    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
363738fd1498Szrj    the type of the memory elements being loaded or stored.  OFFSET_BITS
363838fd1498Szrj    is the number of bits in each scalar offset and OFFSET_SIGN is the
363938fd1498Szrj    sign of the offset.  SCALE is the amount by which the offset should
364038fd1498Szrj    be multiplied *after* it has been converted to address width.
364138fd1498Szrj 
364238fd1498Szrj    Return true if the function is supported, storing the function
364338fd1498Szrj    id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT.  */
364438fd1498Szrj 
364538fd1498Szrj bool
vect_gather_scatter_fn_p(bool read_p,bool masked_p,tree vectype,tree memory_type,unsigned int offset_bits,signop offset_sign,int scale,internal_fn * ifn_out,tree * element_type_out)364638fd1498Szrj vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
364738fd1498Szrj 			  tree memory_type, unsigned int offset_bits,
364838fd1498Szrj 			  signop offset_sign, int scale,
364938fd1498Szrj 			  internal_fn *ifn_out, tree *element_type_out)
365038fd1498Szrj {
365138fd1498Szrj   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
365238fd1498Szrj   unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
365338fd1498Szrj   if (offset_bits > element_bits)
365438fd1498Szrj     /* Internal functions require the offset to be the same width as
365538fd1498Szrj        the vector elements.  We can extend narrower offsets, but it isn't
365638fd1498Szrj        safe to truncate wider offsets.  */
365738fd1498Szrj     return false;
365838fd1498Szrj 
365938fd1498Szrj   if (element_bits != memory_bits)
366038fd1498Szrj     /* For now the vector elements must be the same width as the
366138fd1498Szrj        memory elements.  */
366238fd1498Szrj     return false;
366338fd1498Szrj 
366438fd1498Szrj   /* Work out which function we need.  */
366538fd1498Szrj   internal_fn ifn;
366638fd1498Szrj   if (read_p)
366738fd1498Szrj     ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
366838fd1498Szrj   else
366938fd1498Szrj     ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
367038fd1498Szrj 
367138fd1498Szrj   /* Test whether the target supports this combination.  */
367238fd1498Szrj   if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
367338fd1498Szrj 					       offset_sign, scale))
367438fd1498Szrj     return false;
367538fd1498Szrj 
367638fd1498Szrj   *ifn_out = ifn;
367738fd1498Szrj   *element_type_out = TREE_TYPE (vectype);
367838fd1498Szrj   return true;
367938fd1498Szrj }
368038fd1498Szrj 
368138fd1498Szrj /* CALL is a call to an internal gather load or scatter store function.
368238fd1498Szrj    Describe the operation in INFO.  */
368338fd1498Szrj 
368438fd1498Szrj static void
vect_describe_gather_scatter_call(gcall * call,gather_scatter_info * info)368538fd1498Szrj vect_describe_gather_scatter_call (gcall *call, gather_scatter_info *info)
368638fd1498Szrj {
368738fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (call);
368838fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
368938fd1498Szrj   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
369038fd1498Szrj 
369138fd1498Szrj   info->ifn = gimple_call_internal_fn (call);
369238fd1498Szrj   info->decl = NULL_TREE;
369338fd1498Szrj   info->base = gimple_call_arg (call, 0);
369438fd1498Szrj   info->offset = gimple_call_arg (call, 1);
369538fd1498Szrj   info->offset_dt = vect_unknown_def_type;
369638fd1498Szrj   info->offset_vectype = NULL_TREE;
369738fd1498Szrj   info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
369838fd1498Szrj   info->element_type = TREE_TYPE (vectype);
369938fd1498Szrj   info->memory_type = TREE_TYPE (DR_REF (dr));
370038fd1498Szrj }
370138fd1498Szrj 
370238fd1498Szrj /* Return true if a non-affine read or write in STMT is suitable for a
370338fd1498Szrj    gather load or scatter store.  Describe the operation in *INFO if so.  */
370438fd1498Szrj 
370538fd1498Szrj bool
vect_check_gather_scatter(gimple * stmt,loop_vec_info loop_vinfo,gather_scatter_info * info)370638fd1498Szrj vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
370738fd1498Szrj 			   gather_scatter_info *info)
370838fd1498Szrj {
370938fd1498Szrj   HOST_WIDE_INT scale = 1;
371038fd1498Szrj   poly_int64 pbitpos, pbitsize;
371138fd1498Szrj   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
371238fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
371338fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
371438fd1498Szrj   tree offtype = NULL_TREE;
371538fd1498Szrj   tree decl = NULL_TREE, base, off;
371638fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
371738fd1498Szrj   tree memory_type = TREE_TYPE (DR_REF (dr));
371838fd1498Szrj   machine_mode pmode;
371938fd1498Szrj   int punsignedp, reversep, pvolatilep = 0;
372038fd1498Szrj   internal_fn ifn;
372138fd1498Szrj   tree element_type;
372238fd1498Szrj   bool masked_p = false;
372338fd1498Szrj 
372438fd1498Szrj   /* See whether this is already a call to a gather/scatter internal function.
372538fd1498Szrj      If not, see whether it's a masked load or store.  */
372638fd1498Szrj   gcall *call = dyn_cast <gcall *> (stmt);
372738fd1498Szrj   if (call && gimple_call_internal_p (call))
372838fd1498Szrj     {
372938fd1498Szrj       ifn = gimple_call_internal_fn (stmt);
373038fd1498Szrj       if (internal_gather_scatter_fn_p (ifn))
373138fd1498Szrj 	{
373238fd1498Szrj 	  vect_describe_gather_scatter_call (call, info);
373338fd1498Szrj 	  return true;
373438fd1498Szrj 	}
373538fd1498Szrj       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
373638fd1498Szrj     }
373738fd1498Szrj 
373838fd1498Szrj   /* True if we should aim to use internal functions rather than
373938fd1498Szrj      built-in functions.  */
374038fd1498Szrj   bool use_ifn_p = (DR_IS_READ (dr)
374138fd1498Szrj 		    ? supports_vec_gather_load_p ()
374238fd1498Szrj 		    : supports_vec_scatter_store_p ());
374338fd1498Szrj 
374438fd1498Szrj   base = DR_REF (dr);
374538fd1498Szrj   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
374638fd1498Szrj      see if we can use the def stmt of the address.  */
374738fd1498Szrj   if (masked_p
374838fd1498Szrj       && TREE_CODE (base) == MEM_REF
374938fd1498Szrj       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
375038fd1498Szrj       && integer_zerop (TREE_OPERAND (base, 1))
375138fd1498Szrj       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
375238fd1498Szrj     {
375338fd1498Szrj       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
375438fd1498Szrj       if (is_gimple_assign (def_stmt)
375538fd1498Szrj 	  && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
375638fd1498Szrj 	base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
375738fd1498Szrj     }
375838fd1498Szrj 
375938fd1498Szrj   /* The gather and scatter builtins need address of the form
376038fd1498Szrj      loop_invariant + vector * {1, 2, 4, 8}
376138fd1498Szrj      or
376238fd1498Szrj      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
376338fd1498Szrj      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
376438fd1498Szrj      of loop invariants/SSA_NAMEs defined in the loop, with casts,
376538fd1498Szrj      multiplications and additions in it.  To get a vector, we need
376638fd1498Szrj      a single SSA_NAME that will be defined in the loop and will
376738fd1498Szrj      contain everything that is not loop invariant and that can be
376838fd1498Szrj      vectorized.  The following code attempts to find such a preexistng
376938fd1498Szrj      SSA_NAME OFF and put the loop invariants into a tree BASE
377038fd1498Szrj      that can be gimplified before the loop.  */
377138fd1498Szrj   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
377238fd1498Szrj 			      &punsignedp, &reversep, &pvolatilep);
377338fd1498Szrj   gcc_assert (base && !reversep);
377438fd1498Szrj   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
377538fd1498Szrj 
377638fd1498Szrj   if (TREE_CODE (base) == MEM_REF)
377738fd1498Szrj     {
377838fd1498Szrj       if (!integer_zerop (TREE_OPERAND (base, 1)))
377938fd1498Szrj 	{
378038fd1498Szrj 	  if (off == NULL_TREE)
378138fd1498Szrj 	    off = wide_int_to_tree (sizetype, mem_ref_offset (base));
378238fd1498Szrj 	  else
378338fd1498Szrj 	    off = size_binop (PLUS_EXPR, off,
378438fd1498Szrj 			      fold_convert (sizetype, TREE_OPERAND (base, 1)));
378538fd1498Szrj 	}
378638fd1498Szrj       base = TREE_OPERAND (base, 0);
378738fd1498Szrj     }
378838fd1498Szrj   else
378938fd1498Szrj     base = build_fold_addr_expr (base);
379038fd1498Szrj 
379138fd1498Szrj   if (off == NULL_TREE)
379238fd1498Szrj     off = size_zero_node;
379338fd1498Szrj 
379438fd1498Szrj   /* If base is not loop invariant, either off is 0, then we start with just
379538fd1498Szrj      the constant offset in the loop invariant BASE and continue with base
379638fd1498Szrj      as OFF, otherwise give up.
379738fd1498Szrj      We could handle that case by gimplifying the addition of base + off
379838fd1498Szrj      into some SSA_NAME and use that as off, but for now punt.  */
379938fd1498Szrj   if (!expr_invariant_in_loop_p (loop, base))
380038fd1498Szrj     {
380138fd1498Szrj       if (!integer_zerop (off))
380238fd1498Szrj 	return false;
380338fd1498Szrj       off = base;
380438fd1498Szrj       base = size_int (pbytepos);
380538fd1498Szrj     }
380638fd1498Szrj   /* Otherwise put base + constant offset into the loop invariant BASE
380738fd1498Szrj      and continue with OFF.  */
380838fd1498Szrj   else
380938fd1498Szrj     {
381038fd1498Szrj       base = fold_convert (sizetype, base);
381138fd1498Szrj       base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
381238fd1498Szrj     }
381338fd1498Szrj 
381438fd1498Szrj   /* OFF at this point may be either a SSA_NAME or some tree expression
381538fd1498Szrj      from get_inner_reference.  Try to peel off loop invariants from it
381638fd1498Szrj      into BASE as long as possible.  */
381738fd1498Szrj   STRIP_NOPS (off);
381838fd1498Szrj   while (offtype == NULL_TREE)
381938fd1498Szrj     {
382038fd1498Szrj       enum tree_code code;
382138fd1498Szrj       tree op0, op1, add = NULL_TREE;
382238fd1498Szrj 
382338fd1498Szrj       if (TREE_CODE (off) == SSA_NAME)
382438fd1498Szrj 	{
382538fd1498Szrj 	  gimple *def_stmt = SSA_NAME_DEF_STMT (off);
382638fd1498Szrj 
382738fd1498Szrj 	  if (expr_invariant_in_loop_p (loop, off))
382838fd1498Szrj 	    return false;
382938fd1498Szrj 
383038fd1498Szrj 	  if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
383138fd1498Szrj 	    break;
383238fd1498Szrj 
383338fd1498Szrj 	  op0 = gimple_assign_rhs1 (def_stmt);
383438fd1498Szrj 	  code = gimple_assign_rhs_code (def_stmt);
383538fd1498Szrj 	  op1 = gimple_assign_rhs2 (def_stmt);
383638fd1498Szrj 	}
383738fd1498Szrj       else
383838fd1498Szrj 	{
383938fd1498Szrj 	  if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
384038fd1498Szrj 	    return false;
384138fd1498Szrj 	  code = TREE_CODE (off);
384238fd1498Szrj 	  extract_ops_from_tree (off, &code, &op0, &op1);
384338fd1498Szrj 	}
384438fd1498Szrj       switch (code)
384538fd1498Szrj 	{
384638fd1498Szrj 	case POINTER_PLUS_EXPR:
384738fd1498Szrj 	case PLUS_EXPR:
384838fd1498Szrj 	  if (expr_invariant_in_loop_p (loop, op0))
384938fd1498Szrj 	    {
385038fd1498Szrj 	      add = op0;
385138fd1498Szrj 	      off = op1;
385238fd1498Szrj 	    do_add:
385338fd1498Szrj 	      add = fold_convert (sizetype, add);
385438fd1498Szrj 	      if (scale != 1)
385538fd1498Szrj 		add = size_binop (MULT_EXPR, add, size_int (scale));
385638fd1498Szrj 	      base = size_binop (PLUS_EXPR, base, add);
385738fd1498Szrj 	      continue;
385838fd1498Szrj 	    }
385938fd1498Szrj 	  if (expr_invariant_in_loop_p (loop, op1))
386038fd1498Szrj 	    {
386138fd1498Szrj 	      add = op1;
386238fd1498Szrj 	      off = op0;
386338fd1498Szrj 	      goto do_add;
386438fd1498Szrj 	    }
386538fd1498Szrj 	  break;
386638fd1498Szrj 	case MINUS_EXPR:
386738fd1498Szrj 	  if (expr_invariant_in_loop_p (loop, op1))
386838fd1498Szrj 	    {
386938fd1498Szrj 	      add = fold_convert (sizetype, op1);
387038fd1498Szrj 	      add = size_binop (MINUS_EXPR, size_zero_node, add);
387138fd1498Szrj 	      off = op0;
387238fd1498Szrj 	      goto do_add;
387338fd1498Szrj 	    }
387438fd1498Szrj 	  break;
387538fd1498Szrj 	case MULT_EXPR:
387638fd1498Szrj 	  if (scale == 1 && tree_fits_shwi_p (op1))
387738fd1498Szrj 	    {
387838fd1498Szrj 	      int new_scale = tree_to_shwi (op1);
387938fd1498Szrj 	      /* Only treat this as a scaling operation if the target
388038fd1498Szrj 		 supports it.  */
388138fd1498Szrj 	      if (use_ifn_p
388238fd1498Szrj 		  && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p,
388338fd1498Szrj 						vectype, memory_type, 1,
388438fd1498Szrj 						TYPE_SIGN (TREE_TYPE (op0)),
388538fd1498Szrj 						new_scale, &ifn,
388638fd1498Szrj 						&element_type))
388738fd1498Szrj 		break;
388838fd1498Szrj 	      scale = new_scale;
388938fd1498Szrj 	      off = op0;
389038fd1498Szrj 	      continue;
389138fd1498Szrj 	    }
389238fd1498Szrj 	  break;
389338fd1498Szrj 	case SSA_NAME:
389438fd1498Szrj 	  off = op0;
389538fd1498Szrj 	  continue;
389638fd1498Szrj 	CASE_CONVERT:
389738fd1498Szrj 	  if (!POINTER_TYPE_P (TREE_TYPE (op0))
389838fd1498Szrj 	      && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
389938fd1498Szrj 	    break;
390038fd1498Szrj 	  if (TYPE_PRECISION (TREE_TYPE (op0))
390138fd1498Szrj 	      == TYPE_PRECISION (TREE_TYPE (off)))
390238fd1498Szrj 	    {
390338fd1498Szrj 	      off = op0;
390438fd1498Szrj 	      continue;
390538fd1498Szrj 	    }
390638fd1498Szrj 
390738fd1498Szrj 	  /* The internal functions need the offset to be the same width
390838fd1498Szrj 	     as the elements of VECTYPE.  Don't include operations that
390938fd1498Szrj 	     cast the offset from that width to a different width.  */
391038fd1498Szrj 	  if (use_ifn_p
391138fd1498Szrj 	      && (int_size_in_bytes (TREE_TYPE (vectype))
391238fd1498Szrj 		  == int_size_in_bytes (TREE_TYPE (off))))
391338fd1498Szrj 	    break;
391438fd1498Szrj 
391538fd1498Szrj 	  if (TYPE_PRECISION (TREE_TYPE (op0))
391638fd1498Szrj 	      < TYPE_PRECISION (TREE_TYPE (off)))
391738fd1498Szrj 	    {
391838fd1498Szrj 	      off = op0;
391938fd1498Szrj 	      offtype = TREE_TYPE (off);
392038fd1498Szrj 	      STRIP_NOPS (off);
392138fd1498Szrj 	      continue;
392238fd1498Szrj 	    }
392338fd1498Szrj 	  break;
392438fd1498Szrj 	default:
392538fd1498Szrj 	  break;
392638fd1498Szrj 	}
392738fd1498Szrj       break;
392838fd1498Szrj     }
392938fd1498Szrj 
393038fd1498Szrj   /* If at the end OFF still isn't a SSA_NAME or isn't
393138fd1498Szrj      defined in the loop, punt.  */
393238fd1498Szrj   if (TREE_CODE (off) != SSA_NAME
393338fd1498Szrj       || expr_invariant_in_loop_p (loop, off))
393438fd1498Szrj     return false;
393538fd1498Szrj 
393638fd1498Szrj   if (offtype == NULL_TREE)
393738fd1498Szrj     offtype = TREE_TYPE (off);
393838fd1498Szrj 
393938fd1498Szrj   if (use_ifn_p)
394038fd1498Szrj     {
394138fd1498Szrj       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
394238fd1498Szrj 				     memory_type, TYPE_PRECISION (offtype),
394338fd1498Szrj 				     TYPE_SIGN (offtype), scale, &ifn,
394438fd1498Szrj 				     &element_type))
394538fd1498Szrj 	return false;
394638fd1498Szrj     }
394738fd1498Szrj   else
394838fd1498Szrj     {
394938fd1498Szrj       if (DR_IS_READ (dr))
395038fd1498Szrj 	{
395138fd1498Szrj 	  if (targetm.vectorize.builtin_gather)
395238fd1498Szrj 	    decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
395338fd1498Szrj 	}
395438fd1498Szrj       else
395538fd1498Szrj 	{
395638fd1498Szrj 	  if (targetm.vectorize.builtin_scatter)
395738fd1498Szrj 	    decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
395838fd1498Szrj 	}
395938fd1498Szrj 
396038fd1498Szrj       if (!decl)
396138fd1498Szrj 	return false;
396238fd1498Szrj 
396338fd1498Szrj       ifn = IFN_LAST;
396438fd1498Szrj       element_type = TREE_TYPE (vectype);
396538fd1498Szrj     }
396638fd1498Szrj 
396738fd1498Szrj   info->ifn = ifn;
396838fd1498Szrj   info->decl = decl;
396938fd1498Szrj   info->base = base;
397038fd1498Szrj   info->offset = off;
397138fd1498Szrj   info->offset_dt = vect_unknown_def_type;
397238fd1498Szrj   info->offset_vectype = NULL_TREE;
397338fd1498Szrj   info->scale = scale;
397438fd1498Szrj   info->element_type = element_type;
397538fd1498Szrj   info->memory_type = memory_type;
397638fd1498Szrj   return true;
397738fd1498Szrj }
397838fd1498Szrj 
397938fd1498Szrj /* Function vect_analyze_data_refs.
398038fd1498Szrj 
398138fd1498Szrj   Find all the data references in the loop or basic block.
398238fd1498Szrj 
398338fd1498Szrj    The general structure of the analysis of data refs in the vectorizer is as
398438fd1498Szrj    follows:
398538fd1498Szrj    1- vect_analyze_data_refs(loop/bb): call
398638fd1498Szrj       compute_data_dependences_for_loop/bb to find and analyze all data-refs
398738fd1498Szrj       in the loop/bb and their dependences.
398838fd1498Szrj    2- vect_analyze_dependences(): apply dependence testing using ddrs.
398938fd1498Szrj    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
399038fd1498Szrj    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
399138fd1498Szrj 
399238fd1498Szrj */
399338fd1498Szrj 
399438fd1498Szrj bool
vect_analyze_data_refs(vec_info * vinfo,poly_uint64 * min_vf)399538fd1498Szrj vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
399638fd1498Szrj {
399738fd1498Szrj   struct loop *loop = NULL;
399838fd1498Szrj   unsigned int i;
399938fd1498Szrj   struct data_reference *dr;
400038fd1498Szrj   tree scalar_type;
400138fd1498Szrj 
400238fd1498Szrj   if (dump_enabled_p ())
400338fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
400438fd1498Szrj 		     "=== vect_analyze_data_refs ===\n");
400538fd1498Szrj 
400638fd1498Szrj   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
400738fd1498Szrj     loop = LOOP_VINFO_LOOP (loop_vinfo);
400838fd1498Szrj 
400938fd1498Szrj   /* Go through the data-refs, check that the analysis succeeded.  Update
401038fd1498Szrj      pointer from stmt_vec_info struct to DR and vectype.  */
401138fd1498Szrj 
401238fd1498Szrj   vec<data_reference_p> datarefs = vinfo->datarefs;
401338fd1498Szrj   FOR_EACH_VEC_ELT (datarefs, i, dr)
401438fd1498Szrj     {
401538fd1498Szrj       gimple *stmt;
401638fd1498Szrj       stmt_vec_info stmt_info;
401738fd1498Szrj       tree base, offset, init;
401838fd1498Szrj       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
401938fd1498Szrj       bool simd_lane_access = false;
402038fd1498Szrj       poly_uint64 vf;
402138fd1498Szrj 
402238fd1498Szrj again:
402338fd1498Szrj       if (!dr || !DR_REF (dr))
402438fd1498Szrj         {
402538fd1498Szrj           if (dump_enabled_p ())
402638fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
402738fd1498Szrj 	                     "not vectorized: unhandled data-ref\n");
402838fd1498Szrj           return false;
402938fd1498Szrj         }
403038fd1498Szrj 
403138fd1498Szrj       stmt = DR_STMT (dr);
403238fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
403338fd1498Szrj 
403438fd1498Szrj       /* Discard clobbers from the dataref vector.  We will remove
403538fd1498Szrj          clobber stmts during vectorization.  */
403638fd1498Szrj       if (gimple_clobber_p (stmt))
403738fd1498Szrj 	{
403838fd1498Szrj 	  free_data_ref (dr);
403938fd1498Szrj 	  if (i == datarefs.length () - 1)
404038fd1498Szrj 	    {
404138fd1498Szrj 	      datarefs.pop ();
404238fd1498Szrj 	      break;
404338fd1498Szrj 	    }
404438fd1498Szrj 	  datarefs.ordered_remove (i);
404538fd1498Szrj 	  dr = datarefs[i];
404638fd1498Szrj 	  goto again;
404738fd1498Szrj 	}
404838fd1498Szrj 
404938fd1498Szrj       /* Check that analysis of the data-ref succeeded.  */
405038fd1498Szrj       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
405138fd1498Szrj 	  || !DR_STEP (dr))
405238fd1498Szrj         {
405338fd1498Szrj 	  bool maybe_gather
405438fd1498Szrj 	    = DR_IS_READ (dr)
405538fd1498Szrj 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
405638fd1498Szrj 	      && (targetm.vectorize.builtin_gather != NULL
405738fd1498Szrj 		  || supports_vec_gather_load_p ());
405838fd1498Szrj 	  bool maybe_scatter
405938fd1498Szrj 	    = DR_IS_WRITE (dr)
406038fd1498Szrj 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
406138fd1498Szrj 	      && (targetm.vectorize.builtin_scatter != NULL
406238fd1498Szrj 		  || supports_vec_scatter_store_p ());
406338fd1498Szrj 	  bool maybe_simd_lane_access
406438fd1498Szrj 	    = is_a <loop_vec_info> (vinfo) && loop->simduid;
406538fd1498Szrj 
406638fd1498Szrj 	  /* If target supports vector gather loads or scatter stores, or if
406738fd1498Szrj 	     this might be a SIMD lane access, see if they can't be used.  */
406838fd1498Szrj 	  if (is_a <loop_vec_info> (vinfo)
406938fd1498Szrj 	      && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
407038fd1498Szrj 	      && !nested_in_vect_loop_p (loop, stmt))
407138fd1498Szrj 	    {
407238fd1498Szrj 	      struct data_reference *newdr
407338fd1498Szrj 		= create_data_ref (NULL, loop_containing_stmt (stmt),
407438fd1498Szrj 				   DR_REF (dr), stmt, !maybe_scatter,
407538fd1498Szrj 				   DR_IS_CONDITIONAL_IN_STMT (dr));
407638fd1498Szrj 	      gcc_assert (newdr != NULL && DR_REF (newdr));
407738fd1498Szrj 	      if (DR_BASE_ADDRESS (newdr)
407838fd1498Szrj 		  && DR_OFFSET (newdr)
407938fd1498Szrj 		  && DR_INIT (newdr)
408038fd1498Szrj 		  && DR_STEP (newdr)
408138fd1498Szrj 		  && integer_zerop (DR_STEP (newdr)))
408238fd1498Szrj 		{
408338fd1498Szrj 		  if (maybe_simd_lane_access)
408438fd1498Szrj 		    {
408538fd1498Szrj 		      tree off = DR_OFFSET (newdr);
408638fd1498Szrj 		      STRIP_NOPS (off);
408738fd1498Szrj 		      if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
408838fd1498Szrj 			  && TREE_CODE (off) == MULT_EXPR
408938fd1498Szrj 			  && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
409038fd1498Szrj 			{
409138fd1498Szrj 			  tree step = TREE_OPERAND (off, 1);
409238fd1498Szrj 			  off = TREE_OPERAND (off, 0);
409338fd1498Szrj 			  STRIP_NOPS (off);
409438fd1498Szrj 			  if (CONVERT_EXPR_P (off)
409538fd1498Szrj 			      && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
409638fd1498Szrj 									  0)))
409738fd1498Szrj 				 < TYPE_PRECISION (TREE_TYPE (off)))
409838fd1498Szrj 			    off = TREE_OPERAND (off, 0);
409938fd1498Szrj 			  if (TREE_CODE (off) == SSA_NAME)
410038fd1498Szrj 			    {
410138fd1498Szrj 			      gimple *def = SSA_NAME_DEF_STMT (off);
410238fd1498Szrj 			      tree reft = TREE_TYPE (DR_REF (newdr));
410338fd1498Szrj 			      if (is_gimple_call (def)
410438fd1498Szrj 				  && gimple_call_internal_p (def)
410538fd1498Szrj 				  && (gimple_call_internal_fn (def)
410638fd1498Szrj 				      == IFN_GOMP_SIMD_LANE))
410738fd1498Szrj 				{
410838fd1498Szrj 				  tree arg = gimple_call_arg (def, 0);
410938fd1498Szrj 				  gcc_assert (TREE_CODE (arg) == SSA_NAME);
411038fd1498Szrj 				  arg = SSA_NAME_VAR (arg);
411138fd1498Szrj 				  if (arg == loop->simduid
411238fd1498Szrj 				      /* For now.  */
411338fd1498Szrj 				      && tree_int_cst_equal
411438fd1498Szrj 					   (TYPE_SIZE_UNIT (reft),
411538fd1498Szrj 					    step))
411638fd1498Szrj 				    {
411738fd1498Szrj 				      DR_OFFSET (newdr) = ssize_int (0);
411838fd1498Szrj 				      DR_STEP (newdr) = step;
411938fd1498Szrj 				      DR_OFFSET_ALIGNMENT (newdr)
412038fd1498Szrj 					= BIGGEST_ALIGNMENT;
412138fd1498Szrj 				      DR_STEP_ALIGNMENT (newdr)
412238fd1498Szrj 					= highest_pow2_factor (step);
412338fd1498Szrj 				      dr = newdr;
412438fd1498Szrj 				      simd_lane_access = true;
412538fd1498Szrj 				    }
412638fd1498Szrj 				}
412738fd1498Szrj 			    }
412838fd1498Szrj 			}
412938fd1498Szrj 		    }
413038fd1498Szrj 		  if (!simd_lane_access && (maybe_gather || maybe_scatter))
413138fd1498Szrj 		    {
413238fd1498Szrj 		      dr = newdr;
413338fd1498Szrj 		      if (maybe_gather)
413438fd1498Szrj 			gatherscatter = GATHER;
413538fd1498Szrj 		      else
413638fd1498Szrj 			gatherscatter = SCATTER;
413738fd1498Szrj 		    }
413838fd1498Szrj 		}
413938fd1498Szrj 	      if (gatherscatter == SG_NONE && !simd_lane_access)
414038fd1498Szrj 		free_data_ref (newdr);
414138fd1498Szrj 	    }
414238fd1498Szrj 
414338fd1498Szrj 	  if (gatherscatter == SG_NONE && !simd_lane_access)
414438fd1498Szrj 	    {
414538fd1498Szrj 	      if (dump_enabled_p ())
414638fd1498Szrj 		{
414738fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
414838fd1498Szrj                                    "not vectorized: data ref analysis "
414938fd1498Szrj                                    "failed ");
415038fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
415138fd1498Szrj 		}
415238fd1498Szrj 
415338fd1498Szrj 	      if (is_a <bb_vec_info> (vinfo))
415438fd1498Szrj 		break;
415538fd1498Szrj 
415638fd1498Szrj 	      return false;
415738fd1498Szrj 	    }
415838fd1498Szrj         }
415938fd1498Szrj 
416038fd1498Szrj       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
416138fd1498Szrj         {
416238fd1498Szrj           if (dump_enabled_p ())
416338fd1498Szrj             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
416438fd1498Szrj                              "not vectorized: base addr of dr is a "
416538fd1498Szrj                              "constant\n");
416638fd1498Szrj 
416738fd1498Szrj           if (is_a <bb_vec_info> (vinfo))
416838fd1498Szrj 	    break;
416938fd1498Szrj 
417038fd1498Szrj 	  if (gatherscatter != SG_NONE || simd_lane_access)
417138fd1498Szrj 	    free_data_ref (dr);
417238fd1498Szrj 	  return false;
417338fd1498Szrj         }
417438fd1498Szrj 
417538fd1498Szrj       if (TREE_THIS_VOLATILE (DR_REF (dr)))
417638fd1498Szrj         {
417738fd1498Szrj           if (dump_enabled_p ())
417838fd1498Szrj             {
417938fd1498Szrj               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
418038fd1498Szrj                                "not vectorized: volatile type ");
418138fd1498Szrj               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
418238fd1498Szrj             }
418338fd1498Szrj 
418438fd1498Szrj           if (is_a <bb_vec_info> (vinfo))
418538fd1498Szrj 	    break;
418638fd1498Szrj 
418738fd1498Szrj           return false;
418838fd1498Szrj         }
418938fd1498Szrj 
419038fd1498Szrj       if (stmt_can_throw_internal (stmt))
419138fd1498Szrj         {
419238fd1498Szrj           if (dump_enabled_p ())
419338fd1498Szrj             {
419438fd1498Szrj               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419538fd1498Szrj                                "not vectorized: statement can throw an "
419638fd1498Szrj                                "exception ");
419738fd1498Szrj               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
419838fd1498Szrj             }
419938fd1498Szrj 
420038fd1498Szrj           if (is_a <bb_vec_info> (vinfo))
420138fd1498Szrj 	    break;
420238fd1498Szrj 
420338fd1498Szrj 	  if (gatherscatter != SG_NONE || simd_lane_access)
420438fd1498Szrj 	    free_data_ref (dr);
420538fd1498Szrj           return false;
420638fd1498Szrj         }
420738fd1498Szrj 
420838fd1498Szrj       if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
420938fd1498Szrj 	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
421038fd1498Szrj 	{
421138fd1498Szrj           if (dump_enabled_p ())
421238fd1498Szrj             {
421338fd1498Szrj               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
421438fd1498Szrj                                "not vectorized: statement is bitfield "
421538fd1498Szrj                                "access ");
421638fd1498Szrj               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
421738fd1498Szrj             }
421838fd1498Szrj 
421938fd1498Szrj           if (is_a <bb_vec_info> (vinfo))
422038fd1498Szrj 	    break;
422138fd1498Szrj 
422238fd1498Szrj 	  if (gatherscatter != SG_NONE || simd_lane_access)
422338fd1498Szrj 	    free_data_ref (dr);
422438fd1498Szrj           return false;
422538fd1498Szrj 	}
422638fd1498Szrj 
422738fd1498Szrj       base = unshare_expr (DR_BASE_ADDRESS (dr));
422838fd1498Szrj       offset = unshare_expr (DR_OFFSET (dr));
422938fd1498Szrj       init = unshare_expr (DR_INIT (dr));
423038fd1498Szrj 
423138fd1498Szrj       if (is_gimple_call (stmt)
423238fd1498Szrj 	  && (!gimple_call_internal_p (stmt)
423338fd1498Szrj 	      || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
423438fd1498Szrj 		  && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
423538fd1498Szrj 	{
423638fd1498Szrj 	  if (dump_enabled_p ())
423738fd1498Szrj 	    {
423838fd1498Szrj 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
423938fd1498Szrj 	                       "not vectorized: dr in a call ");
424038fd1498Szrj 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
424138fd1498Szrj 	    }
424238fd1498Szrj 
424338fd1498Szrj 	  if (is_a <bb_vec_info> (vinfo))
424438fd1498Szrj 	    break;
424538fd1498Szrj 
424638fd1498Szrj 	  if (gatherscatter != SG_NONE || simd_lane_access)
424738fd1498Szrj 	    free_data_ref (dr);
424838fd1498Szrj 	  return false;
424938fd1498Szrj 	}
425038fd1498Szrj 
425138fd1498Szrj       /* Update DR field in stmt_vec_info struct.  */
425238fd1498Szrj 
425338fd1498Szrj       /* If the dataref is in an inner-loop of the loop that is considered for
425438fd1498Szrj 	 for vectorization, we also want to analyze the access relative to
425538fd1498Szrj 	 the outer-loop (DR contains information only relative to the
425638fd1498Szrj 	 inner-most enclosing loop).  We do that by building a reference to the
425738fd1498Szrj 	 first location accessed by the inner-loop, and analyze it relative to
425838fd1498Szrj 	 the outer-loop.  */
425938fd1498Szrj       if (loop && nested_in_vect_loop_p (loop, stmt))
426038fd1498Szrj 	{
426138fd1498Szrj 	  /* Build a reference to the first location accessed by the
426238fd1498Szrj 	     inner loop: *(BASE + INIT + OFFSET).  By construction,
426338fd1498Szrj 	     this address must be invariant in the inner loop, so we
426438fd1498Szrj 	     can consider it as being used in the outer loop.  */
426538fd1498Szrj 	  tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
426638fd1498Szrj 					  init, offset);
426738fd1498Szrj 	  tree init_addr = fold_build_pointer_plus (base, init_offset);
426838fd1498Szrj 	  tree init_ref = build_fold_indirect_ref (init_addr);
426938fd1498Szrj 
427038fd1498Szrj 	  if (dump_enabled_p ())
427138fd1498Szrj 	    {
427238fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
427338fd1498Szrj                                "analyze in outer loop: ");
427438fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
427538fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
427638fd1498Szrj 	    }
427738fd1498Szrj 
427838fd1498Szrj 	  if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
427938fd1498Szrj 				     init_ref, loop))
428038fd1498Szrj 	    /* dr_analyze_innermost already explained the failure.  */
428138fd1498Szrj 	    return false;
428238fd1498Szrj 
428338fd1498Szrj           if (dump_enabled_p ())
428438fd1498Szrj 	    {
428538fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
428638fd1498Szrj                                "\touter base_address: ");
428738fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
428838fd1498Szrj                                  STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
428938fd1498Szrj 	      dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
429038fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
429138fd1498Szrj                                  STMT_VINFO_DR_OFFSET (stmt_info));
429238fd1498Szrj 	      dump_printf (MSG_NOTE,
429338fd1498Szrj                            "\n\touter constant offset from base address: ");
429438fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
429538fd1498Szrj                                  STMT_VINFO_DR_INIT (stmt_info));
429638fd1498Szrj 	      dump_printf (MSG_NOTE, "\n\touter step: ");
429738fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
429838fd1498Szrj                                  STMT_VINFO_DR_STEP (stmt_info));
429938fd1498Szrj 	      dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
430038fd1498Szrj 			   STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
430138fd1498Szrj 	      dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
430238fd1498Szrj 			   STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
430338fd1498Szrj 	      dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
430438fd1498Szrj 			   STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
430538fd1498Szrj 	      dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
430638fd1498Szrj 			   STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
430738fd1498Szrj 	    }
430838fd1498Szrj 	}
430938fd1498Szrj 
431038fd1498Szrj       if (STMT_VINFO_DATA_REF (stmt_info))
431138fd1498Szrj         {
431238fd1498Szrj           if (dump_enabled_p ())
431338fd1498Szrj             {
431438fd1498Szrj               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
431538fd1498Szrj                                "not vectorized: more than one data ref "
431638fd1498Szrj                                "in stmt: ");
431738fd1498Szrj               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
431838fd1498Szrj             }
431938fd1498Szrj 
432038fd1498Szrj           if (is_a <bb_vec_info> (vinfo))
432138fd1498Szrj 	    break;
432238fd1498Szrj 
432338fd1498Szrj 	  if (gatherscatter != SG_NONE || simd_lane_access)
432438fd1498Szrj 	    free_data_ref (dr);
432538fd1498Szrj           return false;
432638fd1498Szrj         }
432738fd1498Szrj 
432838fd1498Szrj       STMT_VINFO_DATA_REF (stmt_info) = dr;
432938fd1498Szrj       if (simd_lane_access)
433038fd1498Szrj 	{
433138fd1498Szrj 	  STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
433238fd1498Szrj 	  free_data_ref (datarefs[i]);
433338fd1498Szrj 	  datarefs[i] = dr;
433438fd1498Szrj 	}
433538fd1498Szrj 
433638fd1498Szrj       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == ADDR_EXPR
433738fd1498Szrj 	  && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0))
433838fd1498Szrj 	  && DECL_NONALIASED (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0)))
433938fd1498Szrj 	{
434038fd1498Szrj           if (dump_enabled_p ())
434138fd1498Szrj             {
434238fd1498Szrj               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434338fd1498Szrj                                "not vectorized: base object not addressable "
434438fd1498Szrj 			       "for stmt: ");
434538fd1498Szrj               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
434638fd1498Szrj             }
434738fd1498Szrj           if (is_a <bb_vec_info> (vinfo))
434838fd1498Szrj 	    {
434938fd1498Szrj 	      /* In BB vectorization the ref can still participate
435038fd1498Szrj 	         in dependence analysis, we just can't vectorize it.  */
435138fd1498Szrj 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
435238fd1498Szrj 	      continue;
435338fd1498Szrj 	    }
435438fd1498Szrj 	  return false;
435538fd1498Szrj 	}
435638fd1498Szrj 
435738fd1498Szrj       /* Set vectype for STMT.  */
435838fd1498Szrj       scalar_type = TREE_TYPE (DR_REF (dr));
435938fd1498Szrj       STMT_VINFO_VECTYPE (stmt_info)
436038fd1498Szrj 	= get_vectype_for_scalar_type (scalar_type);
436138fd1498Szrj       if (!STMT_VINFO_VECTYPE (stmt_info))
436238fd1498Szrj         {
436338fd1498Szrj           if (dump_enabled_p ())
436438fd1498Szrj             {
436538fd1498Szrj               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
436638fd1498Szrj                                "not vectorized: no vectype for stmt: ");
436738fd1498Szrj               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
436838fd1498Szrj               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
436938fd1498Szrj               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
437038fd1498Szrj                                  scalar_type);
437138fd1498Szrj               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
437238fd1498Szrj             }
437338fd1498Szrj 
437438fd1498Szrj           if (is_a <bb_vec_info> (vinfo))
437538fd1498Szrj 	    {
437638fd1498Szrj 	      /* No vector type is fine, the ref can still participate
437738fd1498Szrj 	         in dependence analysis, we just can't vectorize it.  */
437838fd1498Szrj 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
437938fd1498Szrj 	      continue;
438038fd1498Szrj 	    }
438138fd1498Szrj 
438238fd1498Szrj 	  if (gatherscatter != SG_NONE || simd_lane_access)
438338fd1498Szrj 	    {
438438fd1498Szrj 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
438538fd1498Szrj 	      if (gatherscatter != SG_NONE)
438638fd1498Szrj 		free_data_ref (dr);
438738fd1498Szrj 	    }
438838fd1498Szrj 	  return false;
438938fd1498Szrj         }
439038fd1498Szrj       else
439138fd1498Szrj 	{
439238fd1498Szrj 	  if (dump_enabled_p ())
439338fd1498Szrj 	    {
439438fd1498Szrj 	      dump_printf_loc (MSG_NOTE, vect_location,
439538fd1498Szrj 			       "got vectype for stmt: ");
439638fd1498Szrj 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
439738fd1498Szrj 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
439838fd1498Szrj 				 STMT_VINFO_VECTYPE (stmt_info));
439938fd1498Szrj 	      dump_printf (MSG_NOTE, "\n");
440038fd1498Szrj 	    }
440138fd1498Szrj 	}
440238fd1498Szrj 
440338fd1498Szrj       /* Adjust the minimal vectorization factor according to the
440438fd1498Szrj 	 vector type.  */
440538fd1498Szrj       vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
440638fd1498Szrj       *min_vf = upper_bound (*min_vf, vf);
440738fd1498Szrj 
440838fd1498Szrj       if (gatherscatter != SG_NONE)
440938fd1498Szrj 	{
441038fd1498Szrj 	  gather_scatter_info gs_info;
441138fd1498Szrj 	  if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
441238fd1498Szrj 					  &gs_info)
441338fd1498Szrj 	      || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
441438fd1498Szrj 	    {
441538fd1498Szrj 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
441638fd1498Szrj 	      free_data_ref (dr);
441738fd1498Szrj 	      if (dump_enabled_p ())
441838fd1498Szrj 		{
441938fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
442038fd1498Szrj 				   (gatherscatter == GATHER) ?
442138fd1498Szrj 				   "not vectorized: not suitable for gather "
442238fd1498Szrj 				   "load " :
442338fd1498Szrj 				   "not vectorized: not suitable for scatter "
442438fd1498Szrj 				   "store ");
442538fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442638fd1498Szrj 		}
442738fd1498Szrj 	      return false;
442838fd1498Szrj 	    }
442938fd1498Szrj 
443038fd1498Szrj 	  free_data_ref (datarefs[i]);
443138fd1498Szrj 	  datarefs[i] = dr;
443238fd1498Szrj 	  STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
443338fd1498Szrj 	}
443438fd1498Szrj 
443538fd1498Szrj       else if (is_a <loop_vec_info> (vinfo)
443638fd1498Szrj 	       && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
443738fd1498Szrj 	{
443838fd1498Szrj 	  if (nested_in_vect_loop_p (loop, stmt))
443938fd1498Szrj 	    {
444038fd1498Szrj 	      if (dump_enabled_p ())
444138fd1498Szrj 		{
444238fd1498Szrj 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
444338fd1498Szrj                                    "not vectorized: not suitable for strided "
444438fd1498Szrj                                    "load ");
444538fd1498Szrj 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
444638fd1498Szrj 		}
444738fd1498Szrj 	      return false;
444838fd1498Szrj 	    }
444938fd1498Szrj 	  STMT_VINFO_STRIDED_P (stmt_info) = true;
445038fd1498Szrj 	}
445138fd1498Szrj     }
445238fd1498Szrj 
445338fd1498Szrj   /* If we stopped analysis at the first dataref we could not analyze
445438fd1498Szrj      when trying to vectorize a basic-block mark the rest of the datarefs
445538fd1498Szrj      as not vectorizable and truncate the vector of datarefs.  That
445638fd1498Szrj      avoids spending useless time in analyzing their dependence.  */
445738fd1498Szrj   if (i != datarefs.length ())
445838fd1498Szrj     {
445938fd1498Szrj       gcc_assert (is_a <bb_vec_info> (vinfo));
446038fd1498Szrj       for (unsigned j = i; j < datarefs.length (); ++j)
446138fd1498Szrj 	{
446238fd1498Szrj 	  data_reference_p dr = datarefs[j];
446338fd1498Szrj           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
446438fd1498Szrj 	  free_data_ref (dr);
446538fd1498Szrj 	}
446638fd1498Szrj       datarefs.truncate (i);
446738fd1498Szrj     }
446838fd1498Szrj 
446938fd1498Szrj   return true;
447038fd1498Szrj }
447138fd1498Szrj 
447238fd1498Szrj 
447338fd1498Szrj /* Function vect_get_new_vect_var.
447438fd1498Szrj 
447538fd1498Szrj    Returns a name for a new variable.  The current naming scheme appends the
447638fd1498Szrj    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
447738fd1498Szrj    the name of vectorizer generated variables, and appends that to NAME if
447838fd1498Szrj    provided.  */
447938fd1498Szrj 
448038fd1498Szrj tree
vect_get_new_vect_var(tree type,enum vect_var_kind var_kind,const char * name)448138fd1498Szrj vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
448238fd1498Szrj {
448338fd1498Szrj   const char *prefix;
448438fd1498Szrj   tree new_vect_var;
448538fd1498Szrj 
448638fd1498Szrj   switch (var_kind)
448738fd1498Szrj   {
448838fd1498Szrj   case vect_simple_var:
448938fd1498Szrj     prefix = "vect";
449038fd1498Szrj     break;
449138fd1498Szrj   case vect_scalar_var:
449238fd1498Szrj     prefix = "stmp";
449338fd1498Szrj     break;
449438fd1498Szrj   case vect_mask_var:
449538fd1498Szrj     prefix = "mask";
449638fd1498Szrj     break;
449738fd1498Szrj   case vect_pointer_var:
449838fd1498Szrj     prefix = "vectp";
449938fd1498Szrj     break;
450038fd1498Szrj   default:
450138fd1498Szrj     gcc_unreachable ();
450238fd1498Szrj   }
450338fd1498Szrj 
450438fd1498Szrj   if (name)
450538fd1498Szrj     {
450638fd1498Szrj       char* tmp = concat (prefix, "_", name, NULL);
450738fd1498Szrj       new_vect_var = create_tmp_reg (type, tmp);
450838fd1498Szrj       free (tmp);
450938fd1498Szrj     }
451038fd1498Szrj   else
451138fd1498Szrj     new_vect_var = create_tmp_reg (type, prefix);
451238fd1498Szrj 
451338fd1498Szrj   return new_vect_var;
451438fd1498Szrj }
451538fd1498Szrj 
451638fd1498Szrj /* Like vect_get_new_vect_var but return an SSA name.  */
451738fd1498Szrj 
451838fd1498Szrj tree
vect_get_new_ssa_name(tree type,enum vect_var_kind var_kind,const char * name)451938fd1498Szrj vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
452038fd1498Szrj {
452138fd1498Szrj   const char *prefix;
452238fd1498Szrj   tree new_vect_var;
452338fd1498Szrj 
452438fd1498Szrj   switch (var_kind)
452538fd1498Szrj   {
452638fd1498Szrj   case vect_simple_var:
452738fd1498Szrj     prefix = "vect";
452838fd1498Szrj     break;
452938fd1498Szrj   case vect_scalar_var:
453038fd1498Szrj     prefix = "stmp";
453138fd1498Szrj     break;
453238fd1498Szrj   case vect_pointer_var:
453338fd1498Szrj     prefix = "vectp";
453438fd1498Szrj     break;
453538fd1498Szrj   default:
453638fd1498Szrj     gcc_unreachable ();
453738fd1498Szrj   }
453838fd1498Szrj 
453938fd1498Szrj   if (name)
454038fd1498Szrj     {
454138fd1498Szrj       char* tmp = concat (prefix, "_", name, NULL);
454238fd1498Szrj       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
454338fd1498Szrj       free (tmp);
454438fd1498Szrj     }
454538fd1498Szrj   else
454638fd1498Szrj     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
454738fd1498Szrj 
454838fd1498Szrj   return new_vect_var;
454938fd1498Szrj }
455038fd1498Szrj 
455138fd1498Szrj /* Duplicate ptr info and set alignment/misaligment on NAME from DR.  */
455238fd1498Szrj 
455338fd1498Szrj static void
vect_duplicate_ssa_name_ptr_info(tree name,data_reference * dr)455438fd1498Szrj vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr)
455538fd1498Szrj {
455638fd1498Szrj   duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
455738fd1498Szrj   int misalign = DR_MISALIGNMENT (dr);
455838fd1498Szrj   if (misalign == DR_MISALIGNMENT_UNKNOWN)
455938fd1498Szrj     mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
456038fd1498Szrj   else
456138fd1498Szrj     set_ptr_info_alignment (SSA_NAME_PTR_INFO (name),
456238fd1498Szrj 			    DR_TARGET_ALIGNMENT (dr), misalign);
456338fd1498Szrj }
456438fd1498Szrj 
456538fd1498Szrj /* Function vect_create_addr_base_for_vector_ref.
456638fd1498Szrj 
456738fd1498Szrj    Create an expression that computes the address of the first memory location
456838fd1498Szrj    that will be accessed for a data reference.
456938fd1498Szrj 
457038fd1498Szrj    Input:
457138fd1498Szrj    STMT: The statement containing the data reference.
457238fd1498Szrj    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
457338fd1498Szrj    OFFSET: Optional. If supplied, it is be added to the initial address.
457438fd1498Szrj    LOOP:    Specify relative to which loop-nest should the address be computed.
457538fd1498Szrj             For example, when the dataref is in an inner-loop nested in an
457638fd1498Szrj 	    outer-loop that is now being vectorized, LOOP can be either the
457738fd1498Szrj 	    outer-loop, or the inner-loop.  The first memory location accessed
457838fd1498Szrj 	    by the following dataref ('in' points to short):
457938fd1498Szrj 
458038fd1498Szrj 		for (i=0; i<N; i++)
458138fd1498Szrj 		   for (j=0; j<M; j++)
458238fd1498Szrj 		     s += in[i+j]
458338fd1498Szrj 
458438fd1498Szrj 	    is as follows:
458538fd1498Szrj 	    if LOOP=i_loop:	&in		(relative to i_loop)
458638fd1498Szrj 	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
458738fd1498Szrj    BYTE_OFFSET: Optional, defaulted to NULL.  If supplied, it is added to the
458838fd1498Szrj 	    initial address.  Unlike OFFSET, which is number of elements to
458938fd1498Szrj 	    be added, BYTE_OFFSET is measured in bytes.
459038fd1498Szrj 
459138fd1498Szrj    Output:
459238fd1498Szrj    1. Return an SSA_NAME whose value is the address of the memory location of
459338fd1498Szrj       the first vector of the data reference.
459438fd1498Szrj    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
459538fd1498Szrj       these statement(s) which define the returned SSA_NAME.
459638fd1498Szrj 
459738fd1498Szrj    FORNOW: We are only handling array accesses with step 1.  */
459838fd1498Szrj 
459938fd1498Szrj tree
vect_create_addr_base_for_vector_ref(gimple * stmt,gimple_seq * new_stmt_list,tree offset,tree byte_offset)460038fd1498Szrj vect_create_addr_base_for_vector_ref (gimple *stmt,
460138fd1498Szrj 				      gimple_seq *new_stmt_list,
460238fd1498Szrj 				      tree offset,
460338fd1498Szrj 				      tree byte_offset)
460438fd1498Szrj {
460538fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
460638fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
460738fd1498Szrj   const char *base_name;
460838fd1498Szrj   tree addr_base;
460938fd1498Szrj   tree dest;
461038fd1498Szrj   gimple_seq seq = NULL;
461138fd1498Szrj   tree vect_ptr_type;
461238fd1498Szrj   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
461338fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
461438fd1498Szrj   innermost_loop_behavior *drb = vect_dr_behavior (dr);
461538fd1498Szrj 
461638fd1498Szrj   tree data_ref_base = unshare_expr (drb->base_address);
461738fd1498Szrj   tree base_offset = unshare_expr (drb->offset);
461838fd1498Szrj   tree init = unshare_expr (drb->init);
461938fd1498Szrj 
462038fd1498Szrj   if (loop_vinfo)
462138fd1498Szrj     base_name = get_name (data_ref_base);
462238fd1498Szrj   else
462338fd1498Szrj     {
462438fd1498Szrj       base_offset = ssize_int (0);
462538fd1498Szrj       init = ssize_int (0);
462638fd1498Szrj       base_name = get_name (DR_REF (dr));
462738fd1498Szrj     }
462838fd1498Szrj 
462938fd1498Szrj   /* Create base_offset */
463038fd1498Szrj   base_offset = size_binop (PLUS_EXPR,
463138fd1498Szrj 			    fold_convert (sizetype, base_offset),
463238fd1498Szrj 			    fold_convert (sizetype, init));
463338fd1498Szrj 
463438fd1498Szrj   if (offset)
463538fd1498Szrj     {
463638fd1498Szrj       offset = fold_build2 (MULT_EXPR, sizetype,
463738fd1498Szrj 			    fold_convert (sizetype, offset), step);
463838fd1498Szrj       base_offset = fold_build2 (PLUS_EXPR, sizetype,
463938fd1498Szrj 				 base_offset, offset);
464038fd1498Szrj     }
464138fd1498Szrj   if (byte_offset)
464238fd1498Szrj     {
464338fd1498Szrj       byte_offset = fold_convert (sizetype, byte_offset);
464438fd1498Szrj       base_offset = fold_build2 (PLUS_EXPR, sizetype,
464538fd1498Szrj 				 base_offset, byte_offset);
464638fd1498Szrj     }
464738fd1498Szrj 
464838fd1498Szrj   /* base + base_offset */
464938fd1498Szrj   if (loop_vinfo)
465038fd1498Szrj     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
465138fd1498Szrj   else
465238fd1498Szrj     {
465338fd1498Szrj       addr_base = build1 (ADDR_EXPR,
465438fd1498Szrj 			  build_pointer_type (TREE_TYPE (DR_REF (dr))),
465538fd1498Szrj 			  unshare_expr (DR_REF (dr)));
465638fd1498Szrj     }
465738fd1498Szrj 
465838fd1498Szrj   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
465938fd1498Szrj   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
466038fd1498Szrj   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
466138fd1498Szrj   gimple_seq_add_seq (new_stmt_list, seq);
466238fd1498Szrj 
466338fd1498Szrj   if (DR_PTR_INFO (dr)
466438fd1498Szrj       && TREE_CODE (addr_base) == SSA_NAME
466538fd1498Szrj       && !SSA_NAME_PTR_INFO (addr_base))
466638fd1498Szrj     {
466738fd1498Szrj       vect_duplicate_ssa_name_ptr_info (addr_base, dr);
466838fd1498Szrj       if (offset || byte_offset)
466938fd1498Szrj 	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
467038fd1498Szrj     }
467138fd1498Szrj 
467238fd1498Szrj   if (dump_enabled_p ())
467338fd1498Szrj     {
467438fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location, "created ");
467538fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
467638fd1498Szrj       dump_printf (MSG_NOTE, "\n");
467738fd1498Szrj     }
467838fd1498Szrj 
467938fd1498Szrj   return addr_base;
468038fd1498Szrj }
468138fd1498Szrj 
468238fd1498Szrj 
468338fd1498Szrj /* Function vect_create_data_ref_ptr.
468438fd1498Szrj 
468538fd1498Szrj    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
468638fd1498Szrj    location accessed in the loop by STMT, along with the def-use update
468738fd1498Szrj    chain to appropriately advance the pointer through the loop iterations.
468838fd1498Szrj    Also set aliasing information for the pointer.  This pointer is used by
468938fd1498Szrj    the callers to this function to create a memory reference expression for
469038fd1498Szrj    vector load/store access.
469138fd1498Szrj 
469238fd1498Szrj    Input:
469338fd1498Szrj    1. STMT: a stmt that references memory. Expected to be of the form
469438fd1498Szrj          GIMPLE_ASSIGN <name, data-ref> or
469538fd1498Szrj 	 GIMPLE_ASSIGN <data-ref, name>.
469638fd1498Szrj    2. AGGR_TYPE: the type of the reference, which should be either a vector
469738fd1498Szrj         or an array.
469838fd1498Szrj    3. AT_LOOP: the loop where the vector memref is to be created.
469938fd1498Szrj    4. OFFSET (optional): an offset to be added to the initial address accessed
470038fd1498Szrj         by the data-ref in STMT.
470138fd1498Szrj    5. BSI: location where the new stmts are to be placed if there is no loop
470238fd1498Szrj    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
470338fd1498Szrj         pointing to the initial address.
470438fd1498Szrj    7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
470538fd1498Szrj 	to the initial address accessed by the data-ref in STMT.  This is
470638fd1498Szrj 	similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
470738fd1498Szrj 	in bytes.
470838fd1498Szrj    8. IV_STEP (optional, defaults to NULL): the amount that should be added
470938fd1498Szrj 	to the IV during each iteration of the loop.  NULL says to move
471038fd1498Szrj 	by one copy of AGGR_TYPE up or down, depending on the step of the
471138fd1498Szrj 	data reference.
471238fd1498Szrj 
471338fd1498Szrj    Output:
471438fd1498Szrj    1. Declare a new ptr to vector_type, and have it point to the base of the
471538fd1498Szrj       data reference (initial addressed accessed by the data reference).
471638fd1498Szrj       For example, for vector of type V8HI, the following code is generated:
471738fd1498Szrj 
471838fd1498Szrj       v8hi *ap;
471938fd1498Szrj       ap = (v8hi *)initial_address;
472038fd1498Szrj 
472138fd1498Szrj       if OFFSET is not supplied:
472238fd1498Szrj          initial_address = &a[init];
472338fd1498Szrj       if OFFSET is supplied:
472438fd1498Szrj          initial_address = &a[init + OFFSET];
472538fd1498Szrj       if BYTE_OFFSET is supplied:
472638fd1498Szrj 	 initial_address = &a[init] + BYTE_OFFSET;
472738fd1498Szrj 
472838fd1498Szrj       Return the initial_address in INITIAL_ADDRESS.
472938fd1498Szrj 
473038fd1498Szrj    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
473138fd1498Szrj       update the pointer in each iteration of the loop.
473238fd1498Szrj 
473338fd1498Szrj       Return the increment stmt that updates the pointer in PTR_INCR.
473438fd1498Szrj 
473538fd1498Szrj    3. Set INV_P to true if the access pattern of the data reference in the
473638fd1498Szrj       vectorized loop is invariant.  Set it to false otherwise.
473738fd1498Szrj 
473838fd1498Szrj    4. Return the pointer.  */
473938fd1498Szrj 
474038fd1498Szrj tree
vect_create_data_ref_ptr(gimple * stmt,tree aggr_type,struct loop * at_loop,tree offset,tree * initial_address,gimple_stmt_iterator * gsi,gimple ** ptr_incr,bool only_init,bool * inv_p,tree byte_offset,tree iv_step)474138fd1498Szrj vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
474238fd1498Szrj 			  tree offset, tree *initial_address,
474338fd1498Szrj 			  gimple_stmt_iterator *gsi, gimple **ptr_incr,
474438fd1498Szrj 			  bool only_init, bool *inv_p, tree byte_offset,
474538fd1498Szrj 			  tree iv_step)
474638fd1498Szrj {
474738fd1498Szrj   const char *base_name;
474838fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
474938fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
475038fd1498Szrj   struct loop *loop = NULL;
475138fd1498Szrj   bool nested_in_vect_loop = false;
475238fd1498Szrj   struct loop *containing_loop = NULL;
475338fd1498Szrj   tree aggr_ptr_type;
475438fd1498Szrj   tree aggr_ptr;
475538fd1498Szrj   tree new_temp;
475638fd1498Szrj   gimple_seq new_stmt_list = NULL;
475738fd1498Szrj   edge pe = NULL;
475838fd1498Szrj   basic_block new_bb;
475938fd1498Szrj   tree aggr_ptr_init;
476038fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
476138fd1498Szrj   tree aptr;
476238fd1498Szrj   gimple_stmt_iterator incr_gsi;
476338fd1498Szrj   bool insert_after;
476438fd1498Szrj   tree indx_before_incr, indx_after_incr;
476538fd1498Szrj   gimple *incr;
476638fd1498Szrj   tree step;
476738fd1498Szrj   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
476838fd1498Szrj 
476938fd1498Szrj   gcc_assert (iv_step != NULL_TREE
477038fd1498Szrj 	      || TREE_CODE (aggr_type) == ARRAY_TYPE
477138fd1498Szrj 	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
477238fd1498Szrj 
477338fd1498Szrj   if (loop_vinfo)
477438fd1498Szrj     {
477538fd1498Szrj       loop = LOOP_VINFO_LOOP (loop_vinfo);
477638fd1498Szrj       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
477738fd1498Szrj       containing_loop = (gimple_bb (stmt))->loop_father;
477838fd1498Szrj       pe = loop_preheader_edge (loop);
477938fd1498Szrj     }
478038fd1498Szrj   else
478138fd1498Szrj     {
478238fd1498Szrj       gcc_assert (bb_vinfo);
478338fd1498Szrj       only_init = true;
478438fd1498Szrj       *ptr_incr = NULL;
478538fd1498Szrj     }
478638fd1498Szrj 
478738fd1498Szrj   /* Check the step (evolution) of the load in LOOP, and record
478838fd1498Szrj      whether it's invariant.  */
478938fd1498Szrj   step = vect_dr_behavior (dr)->step;
479038fd1498Szrj   if (integer_zerop (step))
479138fd1498Szrj     *inv_p = true;
479238fd1498Szrj   else
479338fd1498Szrj     *inv_p = false;
479438fd1498Szrj 
479538fd1498Szrj   /* Create an expression for the first address accessed by this load
479638fd1498Szrj      in LOOP.  */
479738fd1498Szrj   base_name = get_name (DR_BASE_ADDRESS (dr));
479838fd1498Szrj 
479938fd1498Szrj   if (dump_enabled_p ())
480038fd1498Szrj     {
480138fd1498Szrj       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
480238fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
480338fd1498Szrj                        "create %s-pointer variable to type: ",
480438fd1498Szrj 		       get_tree_code_name (TREE_CODE (aggr_type)));
480538fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
480638fd1498Szrj       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
480738fd1498Szrj         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
480838fd1498Szrj       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
480938fd1498Szrj         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
481038fd1498Szrj       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
481138fd1498Szrj         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
481238fd1498Szrj       else
481338fd1498Szrj         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
481438fd1498Szrj       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
481538fd1498Szrj       dump_printf (MSG_NOTE, "\n");
481638fd1498Szrj     }
481738fd1498Szrj 
481838fd1498Szrj   /* (1) Create the new aggregate-pointer variable.
481938fd1498Szrj      Vector and array types inherit the alias set of their component
482038fd1498Szrj      type by default so we need to use a ref-all pointer if the data
482138fd1498Szrj      reference does not conflict with the created aggregated data
482238fd1498Szrj      reference because it is not addressable.  */
482338fd1498Szrj   bool need_ref_all = false;
482438fd1498Szrj   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
482538fd1498Szrj 			      get_alias_set (DR_REF (dr))))
482638fd1498Szrj     need_ref_all = true;
482738fd1498Szrj   /* Likewise for any of the data references in the stmt group.  */
482838fd1498Szrj   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
482938fd1498Szrj     {
483038fd1498Szrj       gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
483138fd1498Szrj       do
483238fd1498Szrj 	{
483338fd1498Szrj 	  stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
483438fd1498Szrj 	  struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
483538fd1498Szrj 	  if (!alias_sets_conflict_p (get_alias_set (aggr_type),
483638fd1498Szrj 				      get_alias_set (DR_REF (sdr))))
483738fd1498Szrj 	    {
483838fd1498Szrj 	      need_ref_all = true;
483938fd1498Szrj 	      break;
484038fd1498Szrj 	    }
484138fd1498Szrj 	  orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
484238fd1498Szrj 	}
484338fd1498Szrj       while (orig_stmt);
484438fd1498Szrj     }
484538fd1498Szrj   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
484638fd1498Szrj 					       need_ref_all);
484738fd1498Szrj   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
484838fd1498Szrj 
484938fd1498Szrj 
485038fd1498Szrj   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
485138fd1498Szrj      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
485238fd1498Szrj      def-use update cycles for the pointer: one relative to the outer-loop
485338fd1498Szrj      (LOOP), which is what steps (3) and (4) below do.  The other is relative
485438fd1498Szrj      to the inner-loop (which is the inner-most loop containing the dataref),
485538fd1498Szrj      and this is done be step (5) below.
485638fd1498Szrj 
485738fd1498Szrj      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
485838fd1498Szrj      inner-most loop, and so steps (3),(4) work the same, and step (5) is
485938fd1498Szrj      redundant.  Steps (3),(4) create the following:
486038fd1498Szrj 
486138fd1498Szrj 	vp0 = &base_addr;
486238fd1498Szrj 	LOOP:	vp1 = phi(vp0,vp2)
486338fd1498Szrj 		...
486438fd1498Szrj 		...
486538fd1498Szrj 		vp2 = vp1 + step
486638fd1498Szrj 		goto LOOP
486738fd1498Szrj 
486838fd1498Szrj      If there is an inner-loop nested in loop, then step (5) will also be
486938fd1498Szrj      applied, and an additional update in the inner-loop will be created:
487038fd1498Szrj 
487138fd1498Szrj 	vp0 = &base_addr;
487238fd1498Szrj 	LOOP:   vp1 = phi(vp0,vp2)
487338fd1498Szrj 		...
487438fd1498Szrj         inner:     vp3 = phi(vp1,vp4)
487538fd1498Szrj 	           vp4 = vp3 + inner_step
487638fd1498Szrj 	           if () goto inner
487738fd1498Szrj 		...
487838fd1498Szrj 		vp2 = vp1 + step
487938fd1498Szrj 		if () goto LOOP   */
488038fd1498Szrj 
488138fd1498Szrj   /* (2) Calculate the initial address of the aggregate-pointer, and set
488238fd1498Szrj      the aggregate-pointer to point to it before the loop.  */
488338fd1498Szrj 
488438fd1498Szrj   /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader.  */
488538fd1498Szrj 
488638fd1498Szrj   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
488738fd1498Szrj 						   offset, byte_offset);
488838fd1498Szrj   if (new_stmt_list)
488938fd1498Szrj     {
489038fd1498Szrj       if (pe)
489138fd1498Szrj         {
489238fd1498Szrj           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
489338fd1498Szrj           gcc_assert (!new_bb);
489438fd1498Szrj         }
489538fd1498Szrj       else
489638fd1498Szrj         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
489738fd1498Szrj     }
489838fd1498Szrj 
489938fd1498Szrj   *initial_address = new_temp;
490038fd1498Szrj   aggr_ptr_init = new_temp;
490138fd1498Szrj 
490238fd1498Szrj   /* (3) Handle the updating of the aggregate-pointer inside the loop.
490338fd1498Szrj      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
490438fd1498Szrj      inner-loop nested in LOOP (during outer-loop vectorization).  */
490538fd1498Szrj 
490638fd1498Szrj   /* No update in loop is required.  */
490738fd1498Szrj   if (only_init && (!loop_vinfo || at_loop == loop))
490838fd1498Szrj     aptr = aggr_ptr_init;
490938fd1498Szrj   else
491038fd1498Szrj     {
491138fd1498Szrj       if (iv_step == NULL_TREE)
491238fd1498Szrj 	{
491338fd1498Szrj 	  /* The step of the aggregate pointer is the type size.  */
491438fd1498Szrj 	  iv_step = TYPE_SIZE_UNIT (aggr_type);
491538fd1498Szrj 	  /* One exception to the above is when the scalar step of the load in
491638fd1498Szrj 	     LOOP is zero. In this case the step here is also zero.  */
491738fd1498Szrj 	  if (*inv_p)
491838fd1498Szrj 	    iv_step = size_zero_node;
491938fd1498Szrj 	  else if (tree_int_cst_sgn (step) == -1)
492038fd1498Szrj 	    iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
492138fd1498Szrj 	}
492238fd1498Szrj 
492338fd1498Szrj       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
492438fd1498Szrj 
492538fd1498Szrj       create_iv (aggr_ptr_init,
492638fd1498Szrj 		 fold_convert (aggr_ptr_type, iv_step),
492738fd1498Szrj 		 aggr_ptr, loop, &incr_gsi, insert_after,
492838fd1498Szrj 		 &indx_before_incr, &indx_after_incr);
492938fd1498Szrj       incr = gsi_stmt (incr_gsi);
493038fd1498Szrj       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
493138fd1498Szrj 
493238fd1498Szrj       /* Copy the points-to information if it exists. */
493338fd1498Szrj       if (DR_PTR_INFO (dr))
493438fd1498Szrj 	{
493538fd1498Szrj 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
493638fd1498Szrj 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
493738fd1498Szrj 	}
493838fd1498Szrj       if (ptr_incr)
493938fd1498Szrj 	*ptr_incr = incr;
494038fd1498Szrj 
494138fd1498Szrj       aptr = indx_before_incr;
494238fd1498Szrj     }
494338fd1498Szrj 
494438fd1498Szrj   if (!nested_in_vect_loop || only_init)
494538fd1498Szrj     return aptr;
494638fd1498Szrj 
494738fd1498Szrj 
494838fd1498Szrj   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
494938fd1498Szrj      nested in LOOP, if exists.  */
495038fd1498Szrj 
495138fd1498Szrj   gcc_assert (nested_in_vect_loop);
495238fd1498Szrj   if (!only_init)
495338fd1498Szrj     {
495438fd1498Szrj       standard_iv_increment_position (containing_loop, &incr_gsi,
495538fd1498Szrj 				      &insert_after);
495638fd1498Szrj       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
495738fd1498Szrj 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
495838fd1498Szrj 		 &indx_after_incr);
495938fd1498Szrj       incr = gsi_stmt (incr_gsi);
496038fd1498Szrj       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
496138fd1498Szrj 
496238fd1498Szrj       /* Copy the points-to information if it exists. */
496338fd1498Szrj       if (DR_PTR_INFO (dr))
496438fd1498Szrj 	{
496538fd1498Szrj 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
496638fd1498Szrj 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
496738fd1498Szrj 	}
496838fd1498Szrj       if (ptr_incr)
496938fd1498Szrj 	*ptr_incr = incr;
497038fd1498Szrj 
497138fd1498Szrj       return indx_before_incr;
497238fd1498Szrj     }
497338fd1498Szrj   else
497438fd1498Szrj     gcc_unreachable ();
497538fd1498Szrj }
497638fd1498Szrj 
497738fd1498Szrj 
497838fd1498Szrj /* Function bump_vector_ptr
497938fd1498Szrj 
498038fd1498Szrj    Increment a pointer (to a vector type) by vector-size. If requested,
498138fd1498Szrj    i.e. if PTR-INCR is given, then also connect the new increment stmt
498238fd1498Szrj    to the existing def-use update-chain of the pointer, by modifying
498338fd1498Szrj    the PTR_INCR as illustrated below:
498438fd1498Szrj 
498538fd1498Szrj    The pointer def-use update-chain before this function:
498638fd1498Szrj                         DATAREF_PTR = phi (p_0, p_2)
498738fd1498Szrj                         ....
498838fd1498Szrj         PTR_INCR:       p_2 = DATAREF_PTR + step
498938fd1498Szrj 
499038fd1498Szrj    The pointer def-use update-chain after this function:
499138fd1498Szrj                         DATAREF_PTR = phi (p_0, p_2)
499238fd1498Szrj                         ....
499338fd1498Szrj                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
499438fd1498Szrj                         ....
499538fd1498Szrj         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
499638fd1498Szrj 
499738fd1498Szrj    Input:
499838fd1498Szrj    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
499938fd1498Szrj                  in the loop.
500038fd1498Szrj    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
500138fd1498Szrj 	      the loop.  The increment amount across iterations is expected
500238fd1498Szrj 	      to be vector_size.
500338fd1498Szrj    BSI - location where the new update stmt is to be placed.
500438fd1498Szrj    STMT - the original scalar memory-access stmt that is being vectorized.
500538fd1498Szrj    BUMP - optional. The offset by which to bump the pointer. If not given,
500638fd1498Szrj 	  the offset is assumed to be vector_size.
500738fd1498Szrj 
500838fd1498Szrj    Output: Return NEW_DATAREF_PTR as illustrated above.
500938fd1498Szrj 
501038fd1498Szrj */
501138fd1498Szrj 
501238fd1498Szrj tree
bump_vector_ptr(tree dataref_ptr,gimple * ptr_incr,gimple_stmt_iterator * gsi,gimple * stmt,tree bump)501338fd1498Szrj bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
501438fd1498Szrj 		 gimple *stmt, tree bump)
501538fd1498Szrj {
501638fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
501738fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
501838fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
501938fd1498Szrj   tree update = TYPE_SIZE_UNIT (vectype);
502038fd1498Szrj   gassign *incr_stmt;
502138fd1498Szrj   ssa_op_iter iter;
502238fd1498Szrj   use_operand_p use_p;
502338fd1498Szrj   tree new_dataref_ptr;
502438fd1498Szrj 
502538fd1498Szrj   if (bump)
502638fd1498Szrj     update = bump;
502738fd1498Szrj 
502838fd1498Szrj   if (TREE_CODE (dataref_ptr) == SSA_NAME)
502938fd1498Szrj     new_dataref_ptr = copy_ssa_name (dataref_ptr);
503038fd1498Szrj   else
503138fd1498Szrj     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
503238fd1498Szrj   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
503338fd1498Szrj 				   dataref_ptr, update);
503438fd1498Szrj   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
503538fd1498Szrj 
503638fd1498Szrj   /* Copy the points-to information if it exists. */
503738fd1498Szrj   if (DR_PTR_INFO (dr))
503838fd1498Szrj     {
503938fd1498Szrj       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
504038fd1498Szrj       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
504138fd1498Szrj     }
504238fd1498Szrj 
504338fd1498Szrj   if (!ptr_incr)
504438fd1498Szrj     return new_dataref_ptr;
504538fd1498Szrj 
504638fd1498Szrj   /* Update the vector-pointer's cross-iteration increment.  */
504738fd1498Szrj   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
504838fd1498Szrj     {
504938fd1498Szrj       tree use = USE_FROM_PTR (use_p);
505038fd1498Szrj 
505138fd1498Szrj       if (use == dataref_ptr)
505238fd1498Szrj         SET_USE (use_p, new_dataref_ptr);
505338fd1498Szrj       else
505438fd1498Szrj         gcc_assert (operand_equal_p (use, update, 0));
505538fd1498Szrj     }
505638fd1498Szrj 
505738fd1498Szrj   return new_dataref_ptr;
505838fd1498Szrj }
505938fd1498Szrj 
506038fd1498Szrj 
506138fd1498Szrj /* Copy memory reference info such as base/clique from the SRC reference
506238fd1498Szrj    to the DEST MEM_REF.  */
506338fd1498Szrj 
506438fd1498Szrj void
vect_copy_ref_info(tree dest,tree src)506538fd1498Szrj vect_copy_ref_info (tree dest, tree src)
506638fd1498Szrj {
506738fd1498Szrj   if (TREE_CODE (dest) != MEM_REF)
506838fd1498Szrj     return;
506938fd1498Szrj 
507038fd1498Szrj   tree src_base = src;
507138fd1498Szrj   while (handled_component_p (src_base))
507238fd1498Szrj     src_base = TREE_OPERAND (src_base, 0);
507338fd1498Szrj   if (TREE_CODE (src_base) != MEM_REF
507438fd1498Szrj       && TREE_CODE (src_base) != TARGET_MEM_REF)
507538fd1498Szrj     return;
507638fd1498Szrj 
507738fd1498Szrj   MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
507838fd1498Szrj   MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
507938fd1498Szrj }
508038fd1498Szrj 
508138fd1498Szrj 
508238fd1498Szrj /* Function vect_create_destination_var.
508338fd1498Szrj 
508438fd1498Szrj    Create a new temporary of type VECTYPE.  */
508538fd1498Szrj 
508638fd1498Szrj tree
vect_create_destination_var(tree scalar_dest,tree vectype)508738fd1498Szrj vect_create_destination_var (tree scalar_dest, tree vectype)
508838fd1498Szrj {
508938fd1498Szrj   tree vec_dest;
509038fd1498Szrj   const char *name;
509138fd1498Szrj   char *new_name;
509238fd1498Szrj   tree type;
509338fd1498Szrj   enum vect_var_kind kind;
509438fd1498Szrj 
509538fd1498Szrj   kind = vectype
509638fd1498Szrj     ? VECTOR_BOOLEAN_TYPE_P (vectype)
509738fd1498Szrj     ? vect_mask_var
509838fd1498Szrj     : vect_simple_var
509938fd1498Szrj     : vect_scalar_var;
510038fd1498Szrj   type = vectype ? vectype : TREE_TYPE (scalar_dest);
510138fd1498Szrj 
510238fd1498Szrj   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
510338fd1498Szrj 
510438fd1498Szrj   name = get_name (scalar_dest);
510538fd1498Szrj   if (name)
510638fd1498Szrj     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
510738fd1498Szrj   else
510838fd1498Szrj     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
510938fd1498Szrj   vec_dest = vect_get_new_vect_var (type, kind, new_name);
511038fd1498Szrj   free (new_name);
511138fd1498Szrj 
511238fd1498Szrj   return vec_dest;
511338fd1498Szrj }
511438fd1498Szrj 
511538fd1498Szrj /* Function vect_grouped_store_supported.
511638fd1498Szrj 
511738fd1498Szrj    Returns TRUE if interleave high and interleave low permutations
511838fd1498Szrj    are supported, and FALSE otherwise.  */
511938fd1498Szrj 
512038fd1498Szrj bool
vect_grouped_store_supported(tree vectype,unsigned HOST_WIDE_INT count)512138fd1498Szrj vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
512238fd1498Szrj {
512338fd1498Szrj   machine_mode mode = TYPE_MODE (vectype);
512438fd1498Szrj 
512538fd1498Szrj   /* vect_permute_store_chain requires the group size to be equal to 3 or
512638fd1498Szrj      be a power of two.  */
512738fd1498Szrj   if (count != 3 && exact_log2 (count) == -1)
512838fd1498Szrj     {
512938fd1498Szrj       if (dump_enabled_p ())
513038fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513138fd1498Szrj 			 "the size of the group of accesses"
513238fd1498Szrj 			 " is not a power of 2 or not eqaul to 3\n");
513338fd1498Szrj       return false;
513438fd1498Szrj     }
513538fd1498Szrj 
513638fd1498Szrj   /* Check that the permutation is supported.  */
513738fd1498Szrj   if (VECTOR_MODE_P (mode))
513838fd1498Szrj     {
513938fd1498Szrj       unsigned int i;
514038fd1498Szrj       if (count == 3)
514138fd1498Szrj 	{
514238fd1498Szrj 	  unsigned int j0 = 0, j1 = 0, j2 = 0;
514338fd1498Szrj 	  unsigned int i, j;
514438fd1498Szrj 
514538fd1498Szrj 	  unsigned int nelt;
514638fd1498Szrj 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
514738fd1498Szrj 	    {
514838fd1498Szrj 	      if (dump_enabled_p ())
514938fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
515038fd1498Szrj 				 "cannot handle groups of 3 stores for"
515138fd1498Szrj 				 " variable-length vectors\n");
515238fd1498Szrj 	      return false;
515338fd1498Szrj 	    }
515438fd1498Szrj 
515538fd1498Szrj 	  vec_perm_builder sel (nelt, nelt, 1);
515638fd1498Szrj 	  sel.quick_grow (nelt);
515738fd1498Szrj 	  vec_perm_indices indices;
515838fd1498Szrj 	  for (j = 0; j < 3; j++)
515938fd1498Szrj 	    {
516038fd1498Szrj 	      int nelt0 = ((3 - j) * nelt) % 3;
516138fd1498Szrj 	      int nelt1 = ((3 - j) * nelt + 1) % 3;
516238fd1498Szrj 	      int nelt2 = ((3 - j) * nelt + 2) % 3;
516338fd1498Szrj 	      for (i = 0; i < nelt; i++)
516438fd1498Szrj 		{
516538fd1498Szrj 		  if (3 * i + nelt0 < nelt)
516638fd1498Szrj 		    sel[3 * i + nelt0] = j0++;
516738fd1498Szrj 		  if (3 * i + nelt1 < nelt)
516838fd1498Szrj 		    sel[3 * i + nelt1] = nelt + j1++;
516938fd1498Szrj 		  if (3 * i + nelt2 < nelt)
517038fd1498Szrj 		    sel[3 * i + nelt2] = 0;
517138fd1498Szrj 		}
517238fd1498Szrj 	      indices.new_vector (sel, 2, nelt);
517338fd1498Szrj 	      if (!can_vec_perm_const_p (mode, indices))
517438fd1498Szrj 		{
517538fd1498Szrj 		  if (dump_enabled_p ())
517638fd1498Szrj 		    dump_printf (MSG_MISSED_OPTIMIZATION,
517738fd1498Szrj 				 "permutation op not supported by target.\n");
517838fd1498Szrj 		  return false;
517938fd1498Szrj 		}
518038fd1498Szrj 
518138fd1498Szrj 	      for (i = 0; i < nelt; i++)
518238fd1498Szrj 		{
518338fd1498Szrj 		  if (3 * i + nelt0 < nelt)
518438fd1498Szrj 		    sel[3 * i + nelt0] = 3 * i + nelt0;
518538fd1498Szrj 		  if (3 * i + nelt1 < nelt)
518638fd1498Szrj 		    sel[3 * i + nelt1] = 3 * i + nelt1;
518738fd1498Szrj 		  if (3 * i + nelt2 < nelt)
518838fd1498Szrj 		    sel[3 * i + nelt2] = nelt + j2++;
518938fd1498Szrj 		}
519038fd1498Szrj 	      indices.new_vector (sel, 2, nelt);
519138fd1498Szrj 	      if (!can_vec_perm_const_p (mode, indices))
519238fd1498Szrj 		{
519338fd1498Szrj 		  if (dump_enabled_p ())
519438fd1498Szrj 		    dump_printf (MSG_MISSED_OPTIMIZATION,
519538fd1498Szrj 				 "permutation op not supported by target.\n");
519638fd1498Szrj 		  return false;
519738fd1498Szrj 		}
519838fd1498Szrj 	    }
519938fd1498Szrj 	  return true;
520038fd1498Szrj 	}
520138fd1498Szrj       else
520238fd1498Szrj 	{
520338fd1498Szrj 	  /* If length is not equal to 3 then only power of 2 is supported.  */
520438fd1498Szrj 	  gcc_assert (pow2p_hwi (count));
520538fd1498Szrj 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
520638fd1498Szrj 
520738fd1498Szrj 	  /* The encoding has 2 interleaved stepped patterns.  */
520838fd1498Szrj 	  vec_perm_builder sel (nelt, 2, 3);
520938fd1498Szrj 	  sel.quick_grow (6);
521038fd1498Szrj 	  for (i = 0; i < 3; i++)
521138fd1498Szrj 	    {
521238fd1498Szrj 	      sel[i * 2] = i;
521338fd1498Szrj 	      sel[i * 2 + 1] = i + nelt;
521438fd1498Szrj 	    }
521538fd1498Szrj 	  vec_perm_indices indices (sel, 2, nelt);
521638fd1498Szrj 	  if (can_vec_perm_const_p (mode, indices))
521738fd1498Szrj 	    {
521838fd1498Szrj 	      for (i = 0; i < 6; i++)
521938fd1498Szrj 		sel[i] += exact_div (nelt, 2);
522038fd1498Szrj 	      indices.new_vector (sel, 2, nelt);
522138fd1498Szrj 	      if (can_vec_perm_const_p (mode, indices))
522238fd1498Szrj 		return true;
522338fd1498Szrj 	    }
522438fd1498Szrj 	}
522538fd1498Szrj     }
522638fd1498Szrj 
522738fd1498Szrj   if (dump_enabled_p ())
522838fd1498Szrj     dump_printf (MSG_MISSED_OPTIMIZATION,
522938fd1498Szrj 		 "permutaion op not supported by target.\n");
523038fd1498Szrj   return false;
523138fd1498Szrj }
523238fd1498Szrj 
523338fd1498Szrj 
523438fd1498Szrj /* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
523538fd1498Szrj    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
523638fd1498Szrj 
523738fd1498Szrj bool
vect_store_lanes_supported(tree vectype,unsigned HOST_WIDE_INT count,bool masked_p)523838fd1498Szrj vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
523938fd1498Szrj 			    bool masked_p)
524038fd1498Szrj {
524138fd1498Szrj   if (masked_p)
524238fd1498Szrj     return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
524338fd1498Szrj 					 vec_mask_store_lanes_optab,
524438fd1498Szrj 					 vectype, count);
524538fd1498Szrj   else
524638fd1498Szrj     return vect_lanes_optab_supported_p ("vec_store_lanes",
524738fd1498Szrj 					 vec_store_lanes_optab,
524838fd1498Szrj 					 vectype, count);
524938fd1498Szrj }
525038fd1498Szrj 
525138fd1498Szrj 
525238fd1498Szrj /* Function vect_permute_store_chain.
525338fd1498Szrj 
525438fd1498Szrj    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
525538fd1498Szrj    a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
525638fd1498Szrj    the data correctly for the stores.  Return the final references for stores
525738fd1498Szrj    in RESULT_CHAIN.
525838fd1498Szrj 
525938fd1498Szrj    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
526038fd1498Szrj    The input is 4 vectors each containing 8 elements.  We assign a number to
526138fd1498Szrj    each element, the input sequence is:
526238fd1498Szrj 
526338fd1498Szrj    1st vec:   0  1  2  3  4  5  6  7
526438fd1498Szrj    2nd vec:   8  9 10 11 12 13 14 15
526538fd1498Szrj    3rd vec:  16 17 18 19 20 21 22 23
526638fd1498Szrj    4th vec:  24 25 26 27 28 29 30 31
526738fd1498Szrj 
526838fd1498Szrj    The output sequence should be:
526938fd1498Szrj 
527038fd1498Szrj    1st vec:  0  8 16 24  1  9 17 25
527138fd1498Szrj    2nd vec:  2 10 18 26  3 11 19 27
527238fd1498Szrj    3rd vec:  4 12 20 28  5 13 21 30
527338fd1498Szrj    4th vec:  6 14 22 30  7 15 23 31
527438fd1498Szrj 
527538fd1498Szrj    i.e., we interleave the contents of the four vectors in their order.
527638fd1498Szrj 
527738fd1498Szrj    We use interleave_high/low instructions to create such output.  The input of
527838fd1498Szrj    each interleave_high/low operation is two vectors:
527938fd1498Szrj    1st vec    2nd vec
528038fd1498Szrj    0 1 2 3    4 5 6 7
528138fd1498Szrj    the even elements of the result vector are obtained left-to-right from the
528238fd1498Szrj    high/low elements of the first vector.  The odd elements of the result are
528338fd1498Szrj    obtained left-to-right from the high/low elements of the second vector.
528438fd1498Szrj    The output of interleave_high will be:   0 4 1 5
528538fd1498Szrj    and of interleave_low:                   2 6 3 7
528638fd1498Szrj 
528738fd1498Szrj 
528838fd1498Szrj    The permutation is done in log LENGTH stages.  In each stage interleave_high
528938fd1498Szrj    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
529038fd1498Szrj    where the first argument is taken from the first half of DR_CHAIN and the
529138fd1498Szrj    second argument from it's second half.
529238fd1498Szrj    In our example,
529338fd1498Szrj 
529438fd1498Szrj    I1: interleave_high (1st vec, 3rd vec)
529538fd1498Szrj    I2: interleave_low (1st vec, 3rd vec)
529638fd1498Szrj    I3: interleave_high (2nd vec, 4th vec)
529738fd1498Szrj    I4: interleave_low (2nd vec, 4th vec)
529838fd1498Szrj 
529938fd1498Szrj    The output for the first stage is:
530038fd1498Szrj 
530138fd1498Szrj    I1:  0 16  1 17  2 18  3 19
530238fd1498Szrj    I2:  4 20  5 21  6 22  7 23
530338fd1498Szrj    I3:  8 24  9 25 10 26 11 27
530438fd1498Szrj    I4: 12 28 13 29 14 30 15 31
530538fd1498Szrj 
530638fd1498Szrj    The output of the second stage, i.e. the final result is:
530738fd1498Szrj 
530838fd1498Szrj    I1:  0  8 16 24  1  9 17 25
530938fd1498Szrj    I2:  2 10 18 26  3 11 19 27
531038fd1498Szrj    I3:  4 12 20 28  5 13 21 30
531138fd1498Szrj    I4:  6 14 22 30  7 15 23 31.  */
531238fd1498Szrj 
531338fd1498Szrj void
vect_permute_store_chain(vec<tree> dr_chain,unsigned int length,gimple * stmt,gimple_stmt_iterator * gsi,vec<tree> * result_chain)531438fd1498Szrj vect_permute_store_chain (vec<tree> dr_chain,
531538fd1498Szrj 			  unsigned int length,
531638fd1498Szrj 			  gimple *stmt,
531738fd1498Szrj 			  gimple_stmt_iterator *gsi,
531838fd1498Szrj 			  vec<tree> *result_chain)
531938fd1498Szrj {
532038fd1498Szrj   tree vect1, vect2, high, low;
532138fd1498Szrj   gimple *perm_stmt;
532238fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
532338fd1498Szrj   tree perm_mask_low, perm_mask_high;
532438fd1498Szrj   tree data_ref;
532538fd1498Szrj   tree perm3_mask_low, perm3_mask_high;
532638fd1498Szrj   unsigned int i, j, n, log_length = exact_log2 (length);
532738fd1498Szrj 
532838fd1498Szrj   result_chain->quick_grow (length);
532938fd1498Szrj   memcpy (result_chain->address (), dr_chain.address (),
533038fd1498Szrj 	  length * sizeof (tree));
533138fd1498Szrj 
533238fd1498Szrj   if (length == 3)
533338fd1498Szrj     {
533438fd1498Szrj       /* vect_grouped_store_supported ensures that this is constant.  */
533538fd1498Szrj       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
533638fd1498Szrj       unsigned int j0 = 0, j1 = 0, j2 = 0;
533738fd1498Szrj 
533838fd1498Szrj       vec_perm_builder sel (nelt, nelt, 1);
533938fd1498Szrj       sel.quick_grow (nelt);
534038fd1498Szrj       vec_perm_indices indices;
534138fd1498Szrj       for (j = 0; j < 3; j++)
534238fd1498Szrj         {
534338fd1498Szrj 	  int nelt0 = ((3 - j) * nelt) % 3;
534438fd1498Szrj 	  int nelt1 = ((3 - j) * nelt + 1) % 3;
534538fd1498Szrj 	  int nelt2 = ((3 - j) * nelt + 2) % 3;
534638fd1498Szrj 
534738fd1498Szrj 	  for (i = 0; i < nelt; i++)
534838fd1498Szrj 	    {
534938fd1498Szrj 	      if (3 * i + nelt0 < nelt)
535038fd1498Szrj 		sel[3 * i + nelt0] = j0++;
535138fd1498Szrj 	      if (3 * i + nelt1 < nelt)
535238fd1498Szrj 		sel[3 * i + nelt1] = nelt + j1++;
535338fd1498Szrj 	      if (3 * i + nelt2 < nelt)
535438fd1498Szrj 		sel[3 * i + nelt2] = 0;
535538fd1498Szrj 	    }
535638fd1498Szrj 	  indices.new_vector (sel, 2, nelt);
535738fd1498Szrj 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
535838fd1498Szrj 
535938fd1498Szrj 	  for (i = 0; i < nelt; i++)
536038fd1498Szrj 	    {
536138fd1498Szrj 	      if (3 * i + nelt0 < nelt)
536238fd1498Szrj 		sel[3 * i + nelt0] = 3 * i + nelt0;
536338fd1498Szrj 	      if (3 * i + nelt1 < nelt)
536438fd1498Szrj 		sel[3 * i + nelt1] = 3 * i + nelt1;
536538fd1498Szrj 	      if (3 * i + nelt2 < nelt)
536638fd1498Szrj 		sel[3 * i + nelt2] = nelt + j2++;
536738fd1498Szrj 	    }
536838fd1498Szrj 	  indices.new_vector (sel, 2, nelt);
536938fd1498Szrj 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
537038fd1498Szrj 
537138fd1498Szrj 	  vect1 = dr_chain[0];
537238fd1498Szrj 	  vect2 = dr_chain[1];
537338fd1498Szrj 
537438fd1498Szrj 	  /* Create interleaving stmt:
537538fd1498Szrj 	     low = VEC_PERM_EXPR <vect1, vect2,
537638fd1498Szrj 				  {j, nelt, *, j + 1, nelt + j + 1, *,
537738fd1498Szrj 				   j + 2, nelt + j + 2, *, ...}>  */
537838fd1498Szrj 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
537938fd1498Szrj 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
538038fd1498Szrj 					   vect2, perm3_mask_low);
538138fd1498Szrj 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
538238fd1498Szrj 
538338fd1498Szrj 	  vect1 = data_ref;
538438fd1498Szrj 	  vect2 = dr_chain[2];
538538fd1498Szrj 	  /* Create interleaving stmt:
538638fd1498Szrj 	     low = VEC_PERM_EXPR <vect1, vect2,
538738fd1498Szrj 				  {0, 1, nelt + j, 3, 4, nelt + j + 1,
538838fd1498Szrj 				   6, 7, nelt + j + 2, ...}>  */
538938fd1498Szrj 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
539038fd1498Szrj 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
539138fd1498Szrj 					   vect2, perm3_mask_high);
539238fd1498Szrj 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
539338fd1498Szrj 	  (*result_chain)[j] = data_ref;
539438fd1498Szrj 	}
539538fd1498Szrj     }
539638fd1498Szrj   else
539738fd1498Szrj     {
539838fd1498Szrj       /* If length is not equal to 3 then only power of 2 is supported.  */
539938fd1498Szrj       gcc_assert (pow2p_hwi (length));
540038fd1498Szrj 
540138fd1498Szrj       /* The encoding has 2 interleaved stepped patterns.  */
540238fd1498Szrj       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
540338fd1498Szrj       vec_perm_builder sel (nelt, 2, 3);
540438fd1498Szrj       sel.quick_grow (6);
540538fd1498Szrj       for (i = 0; i < 3; i++)
540638fd1498Szrj 	{
540738fd1498Szrj 	  sel[i * 2] = i;
540838fd1498Szrj 	  sel[i * 2 + 1] = i + nelt;
540938fd1498Szrj 	}
541038fd1498Szrj 	vec_perm_indices indices (sel, 2, nelt);
541138fd1498Szrj 	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
541238fd1498Szrj 
541338fd1498Szrj 	for (i = 0; i < 6; i++)
541438fd1498Szrj 	  sel[i] += exact_div (nelt, 2);
541538fd1498Szrj 	indices.new_vector (sel, 2, nelt);
541638fd1498Szrj 	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
541738fd1498Szrj 
541838fd1498Szrj 	for (i = 0, n = log_length; i < n; i++)
541938fd1498Szrj 	  {
542038fd1498Szrj 	    for (j = 0; j < length/2; j++)
542138fd1498Szrj 	      {
542238fd1498Szrj 		vect1 = dr_chain[j];
542338fd1498Szrj 		vect2 = dr_chain[j+length/2];
542438fd1498Szrj 
542538fd1498Szrj 		/* Create interleaving stmt:
542638fd1498Szrj 		   high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
542738fd1498Szrj 							...}>  */
542838fd1498Szrj 		high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
542938fd1498Szrj 		perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
543038fd1498Szrj 						 vect2, perm_mask_high);
543138fd1498Szrj 		vect_finish_stmt_generation (stmt, perm_stmt, gsi);
543238fd1498Szrj 		(*result_chain)[2*j] = high;
543338fd1498Szrj 
543438fd1498Szrj 		/* Create interleaving stmt:
543538fd1498Szrj 		   low = VEC_PERM_EXPR <vect1, vect2,
543638fd1498Szrj 					{nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
543738fd1498Szrj 					 ...}>  */
543838fd1498Szrj 		low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
543938fd1498Szrj 		perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
544038fd1498Szrj 						 vect2, perm_mask_low);
544138fd1498Szrj 		vect_finish_stmt_generation (stmt, perm_stmt, gsi);
544238fd1498Szrj 		(*result_chain)[2*j+1] = low;
544338fd1498Szrj 	      }
544438fd1498Szrj 	    memcpy (dr_chain.address (), result_chain->address (),
544538fd1498Szrj 		    length * sizeof (tree));
544638fd1498Szrj 	  }
544738fd1498Szrj     }
544838fd1498Szrj }
544938fd1498Szrj 
545038fd1498Szrj /* Function vect_setup_realignment
545138fd1498Szrj 
545238fd1498Szrj    This function is called when vectorizing an unaligned load using
545338fd1498Szrj    the dr_explicit_realign[_optimized] scheme.
545438fd1498Szrj    This function generates the following code at the loop prolog:
545538fd1498Szrj 
545638fd1498Szrj       p = initial_addr;
545738fd1498Szrj    x  msq_init = *(floor(p));   # prolog load
545838fd1498Szrj       realignment_token = call target_builtin;
545938fd1498Szrj     loop:
546038fd1498Szrj    x  msq = phi (msq_init, ---)
546138fd1498Szrj 
546238fd1498Szrj    The stmts marked with x are generated only for the case of
546338fd1498Szrj    dr_explicit_realign_optimized.
546438fd1498Szrj 
546538fd1498Szrj    The code above sets up a new (vector) pointer, pointing to the first
546638fd1498Szrj    location accessed by STMT, and a "floor-aligned" load using that pointer.
546738fd1498Szrj    It also generates code to compute the "realignment-token" (if the relevant
546838fd1498Szrj    target hook was defined), and creates a phi-node at the loop-header bb
546938fd1498Szrj    whose arguments are the result of the prolog-load (created by this
547038fd1498Szrj    function) and the result of a load that takes place in the loop (to be
547138fd1498Szrj    created by the caller to this function).
547238fd1498Szrj 
547338fd1498Szrj    For the case of dr_explicit_realign_optimized:
547438fd1498Szrj    The caller to this function uses the phi-result (msq) to create the
547538fd1498Szrj    realignment code inside the loop, and sets up the missing phi argument,
547638fd1498Szrj    as follows:
547738fd1498Szrj     loop:
547838fd1498Szrj       msq = phi (msq_init, lsq)
547938fd1498Szrj       lsq = *(floor(p'));        # load in loop
548038fd1498Szrj       result = realign_load (msq, lsq, realignment_token);
548138fd1498Szrj 
548238fd1498Szrj    For the case of dr_explicit_realign:
548338fd1498Szrj     loop:
548438fd1498Szrj       msq = *(floor(p)); 	# load in loop
548538fd1498Szrj       p' = p + (VS-1);
548638fd1498Szrj       lsq = *(floor(p'));	# load in loop
548738fd1498Szrj       result = realign_load (msq, lsq, realignment_token);
548838fd1498Szrj 
548938fd1498Szrj    Input:
549038fd1498Szrj    STMT - (scalar) load stmt to be vectorized. This load accesses
549138fd1498Szrj           a memory location that may be unaligned.
549238fd1498Szrj    BSI - place where new code is to be inserted.
549338fd1498Szrj    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
549438fd1498Szrj 			      is used.
549538fd1498Szrj 
549638fd1498Szrj    Output:
549738fd1498Szrj    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
549838fd1498Szrj                        target hook, if defined.
549938fd1498Szrj    Return value - the result of the loop-header phi node.  */
550038fd1498Szrj 
550138fd1498Szrj tree
vect_setup_realignment(gimple * stmt,gimple_stmt_iterator * gsi,tree * realignment_token,enum dr_alignment_support alignment_support_scheme,tree init_addr,struct loop ** at_loop)550238fd1498Szrj vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
550338fd1498Szrj                         tree *realignment_token,
550438fd1498Szrj 			enum dr_alignment_support alignment_support_scheme,
550538fd1498Szrj 			tree init_addr,
550638fd1498Szrj 			struct loop **at_loop)
550738fd1498Szrj {
550838fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
550938fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
551038fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
551138fd1498Szrj   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
551238fd1498Szrj   struct loop *loop = NULL;
551338fd1498Szrj   edge pe = NULL;
551438fd1498Szrj   tree scalar_dest = gimple_assign_lhs (stmt);
551538fd1498Szrj   tree vec_dest;
551638fd1498Szrj   gimple *inc;
551738fd1498Szrj   tree ptr;
551838fd1498Szrj   tree data_ref;
551938fd1498Szrj   basic_block new_bb;
552038fd1498Szrj   tree msq_init = NULL_TREE;
552138fd1498Szrj   tree new_temp;
552238fd1498Szrj   gphi *phi_stmt;
552338fd1498Szrj   tree msq = NULL_TREE;
552438fd1498Szrj   gimple_seq stmts = NULL;
552538fd1498Szrj   bool inv_p;
552638fd1498Szrj   bool compute_in_loop = false;
552738fd1498Szrj   bool nested_in_vect_loop = false;
552838fd1498Szrj   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
552938fd1498Szrj   struct loop *loop_for_initial_load = NULL;
553038fd1498Szrj 
553138fd1498Szrj   if (loop_vinfo)
553238fd1498Szrj     {
553338fd1498Szrj       loop = LOOP_VINFO_LOOP (loop_vinfo);
553438fd1498Szrj       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
553538fd1498Szrj     }
553638fd1498Szrj 
553738fd1498Szrj   gcc_assert (alignment_support_scheme == dr_explicit_realign
553838fd1498Szrj 	      || alignment_support_scheme == dr_explicit_realign_optimized);
553938fd1498Szrj 
554038fd1498Szrj   /* We need to generate three things:
554138fd1498Szrj      1. the misalignment computation
554238fd1498Szrj      2. the extra vector load (for the optimized realignment scheme).
554338fd1498Szrj      3. the phi node for the two vectors from which the realignment is
554438fd1498Szrj       done (for the optimized realignment scheme).  */
554538fd1498Szrj 
554638fd1498Szrj   /* 1. Determine where to generate the misalignment computation.
554738fd1498Szrj 
554838fd1498Szrj      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
554938fd1498Szrj      calculation will be generated by this function, outside the loop (in the
555038fd1498Szrj      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
555138fd1498Szrj      caller, inside the loop.
555238fd1498Szrj 
555338fd1498Szrj      Background: If the misalignment remains fixed throughout the iterations of
555438fd1498Szrj      the loop, then both realignment schemes are applicable, and also the
555538fd1498Szrj      misalignment computation can be done outside LOOP.  This is because we are
555638fd1498Szrj      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
555738fd1498Szrj      are a multiple of VS (the Vector Size), and therefore the misalignment in
555838fd1498Szrj      different vectorized LOOP iterations is always the same.
555938fd1498Szrj      The problem arises only if the memory access is in an inner-loop nested
556038fd1498Szrj      inside LOOP, which is now being vectorized using outer-loop vectorization.
556138fd1498Szrj      This is the only case when the misalignment of the memory access may not
556238fd1498Szrj      remain fixed throughout the iterations of the inner-loop (as explained in
556338fd1498Szrj      detail in vect_supportable_dr_alignment).  In this case, not only is the
556438fd1498Szrj      optimized realignment scheme not applicable, but also the misalignment
556538fd1498Szrj      computation (and generation of the realignment token that is passed to
556638fd1498Szrj      REALIGN_LOAD) have to be done inside the loop.
556738fd1498Szrj 
556838fd1498Szrj      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
556938fd1498Szrj      or not, which in turn determines if the misalignment is computed inside
557038fd1498Szrj      the inner-loop, or outside LOOP.  */
557138fd1498Szrj 
557238fd1498Szrj   if (init_addr != NULL_TREE || !loop_vinfo)
557338fd1498Szrj     {
557438fd1498Szrj       compute_in_loop = true;
557538fd1498Szrj       gcc_assert (alignment_support_scheme == dr_explicit_realign);
557638fd1498Szrj     }
557738fd1498Szrj 
557838fd1498Szrj 
557938fd1498Szrj   /* 2. Determine where to generate the extra vector load.
558038fd1498Szrj 
558138fd1498Szrj      For the optimized realignment scheme, instead of generating two vector
558238fd1498Szrj      loads in each iteration, we generate a single extra vector load in the
558338fd1498Szrj      preheader of the loop, and in each iteration reuse the result of the
558438fd1498Szrj      vector load from the previous iteration.  In case the memory access is in
558538fd1498Szrj      an inner-loop nested inside LOOP, which is now being vectorized using
558638fd1498Szrj      outer-loop vectorization, we need to determine whether this initial vector
558738fd1498Szrj      load should be generated at the preheader of the inner-loop, or can be
558838fd1498Szrj      generated at the preheader of LOOP.  If the memory access has no evolution
558938fd1498Szrj      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
559038fd1498Szrj      to be generated inside LOOP (in the preheader of the inner-loop).  */
559138fd1498Szrj 
559238fd1498Szrj   if (nested_in_vect_loop)
559338fd1498Szrj     {
559438fd1498Szrj       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
559538fd1498Szrj       bool invariant_in_outerloop =
559638fd1498Szrj             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
559738fd1498Szrj       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
559838fd1498Szrj     }
559938fd1498Szrj   else
560038fd1498Szrj     loop_for_initial_load = loop;
560138fd1498Szrj   if (at_loop)
560238fd1498Szrj     *at_loop = loop_for_initial_load;
560338fd1498Szrj 
560438fd1498Szrj   if (loop_for_initial_load)
560538fd1498Szrj     pe = loop_preheader_edge (loop_for_initial_load);
560638fd1498Szrj 
560738fd1498Szrj   /* 3. For the case of the optimized realignment, create the first vector
560838fd1498Szrj       load at the loop preheader.  */
560938fd1498Szrj 
561038fd1498Szrj   if (alignment_support_scheme == dr_explicit_realign_optimized)
561138fd1498Szrj     {
561238fd1498Szrj       /* Create msq_init = *(floor(p1)) in the loop preheader  */
561338fd1498Szrj       gassign *new_stmt;
561438fd1498Szrj 
561538fd1498Szrj       gcc_assert (!compute_in_loop);
561638fd1498Szrj       vec_dest = vect_create_destination_var (scalar_dest, vectype);
561738fd1498Szrj       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
561838fd1498Szrj 				      NULL_TREE, &init_addr, NULL, &inc,
561938fd1498Szrj 				      true, &inv_p);
562038fd1498Szrj       if (TREE_CODE (ptr) == SSA_NAME)
562138fd1498Szrj 	new_temp = copy_ssa_name (ptr);
562238fd1498Szrj       else
562338fd1498Szrj 	new_temp = make_ssa_name (TREE_TYPE (ptr));
562438fd1498Szrj       unsigned int align = DR_TARGET_ALIGNMENT (dr);
562538fd1498Szrj       new_stmt = gimple_build_assign
562638fd1498Szrj 		   (new_temp, BIT_AND_EXPR, ptr,
562738fd1498Szrj 		    build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
562838fd1498Szrj       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
562938fd1498Szrj       gcc_assert (!new_bb);
563038fd1498Szrj       data_ref
563138fd1498Szrj 	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
563238fd1498Szrj 		  build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
563338fd1498Szrj       vect_copy_ref_info (data_ref, DR_REF (dr));
563438fd1498Szrj       new_stmt = gimple_build_assign (vec_dest, data_ref);
563538fd1498Szrj       new_temp = make_ssa_name (vec_dest, new_stmt);
563638fd1498Szrj       gimple_assign_set_lhs (new_stmt, new_temp);
563738fd1498Szrj       if (pe)
563838fd1498Szrj         {
563938fd1498Szrj           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
564038fd1498Szrj           gcc_assert (!new_bb);
564138fd1498Szrj         }
564238fd1498Szrj       else
564338fd1498Szrj          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
564438fd1498Szrj 
564538fd1498Szrj       msq_init = gimple_assign_lhs (new_stmt);
564638fd1498Szrj     }
564738fd1498Szrj 
564838fd1498Szrj   /* 4. Create realignment token using a target builtin, if available.
564938fd1498Szrj       It is done either inside the containing loop, or before LOOP (as
565038fd1498Szrj       determined above).  */
565138fd1498Szrj 
565238fd1498Szrj   if (targetm.vectorize.builtin_mask_for_load)
565338fd1498Szrj     {
565438fd1498Szrj       gcall *new_stmt;
565538fd1498Szrj       tree builtin_decl;
565638fd1498Szrj 
565738fd1498Szrj       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
565838fd1498Szrj       if (!init_addr)
565938fd1498Szrj 	{
566038fd1498Szrj 	  /* Generate the INIT_ADDR computation outside LOOP.  */
566138fd1498Szrj 	  init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
566238fd1498Szrj 							    NULL_TREE);
566338fd1498Szrj           if (loop)
566438fd1498Szrj             {
566538fd1498Szrj    	      pe = loop_preheader_edge (loop);
566638fd1498Szrj 	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
566738fd1498Szrj 	      gcc_assert (!new_bb);
566838fd1498Szrj             }
566938fd1498Szrj           else
567038fd1498Szrj              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
567138fd1498Szrj 	}
567238fd1498Szrj 
567338fd1498Szrj       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
567438fd1498Szrj       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
567538fd1498Szrj       vec_dest =
567638fd1498Szrj 	vect_create_destination_var (scalar_dest,
567738fd1498Szrj 				     gimple_call_return_type (new_stmt));
567838fd1498Szrj       new_temp = make_ssa_name (vec_dest, new_stmt);
567938fd1498Szrj       gimple_call_set_lhs (new_stmt, new_temp);
568038fd1498Szrj 
568138fd1498Szrj       if (compute_in_loop)
568238fd1498Szrj 	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
568338fd1498Szrj       else
568438fd1498Szrj 	{
568538fd1498Szrj 	  /* Generate the misalignment computation outside LOOP.  */
568638fd1498Szrj 	  pe = loop_preheader_edge (loop);
568738fd1498Szrj 	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
568838fd1498Szrj 	  gcc_assert (!new_bb);
568938fd1498Szrj 	}
569038fd1498Szrj 
569138fd1498Szrj       *realignment_token = gimple_call_lhs (new_stmt);
569238fd1498Szrj 
569338fd1498Szrj       /* The result of the CALL_EXPR to this builtin is determined from
569438fd1498Szrj          the value of the parameter and no global variables are touched
569538fd1498Szrj          which makes the builtin a "const" function.  Requiring the
569638fd1498Szrj          builtin to have the "const" attribute makes it unnecessary
569738fd1498Szrj          to call mark_call_clobbered.  */
569838fd1498Szrj       gcc_assert (TREE_READONLY (builtin_decl));
569938fd1498Szrj     }
570038fd1498Szrj 
570138fd1498Szrj   if (alignment_support_scheme == dr_explicit_realign)
570238fd1498Szrj     return msq;
570338fd1498Szrj 
570438fd1498Szrj   gcc_assert (!compute_in_loop);
570538fd1498Szrj   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
570638fd1498Szrj 
570738fd1498Szrj 
570838fd1498Szrj   /* 5. Create msq = phi <msq_init, lsq> in loop  */
570938fd1498Szrj 
571038fd1498Szrj   pe = loop_preheader_edge (containing_loop);
571138fd1498Szrj   vec_dest = vect_create_destination_var (scalar_dest, vectype);
571238fd1498Szrj   msq = make_ssa_name (vec_dest);
571338fd1498Szrj   phi_stmt = create_phi_node (msq, containing_loop->header);
571438fd1498Szrj   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
571538fd1498Szrj 
571638fd1498Szrj   return msq;
571738fd1498Szrj }
571838fd1498Szrj 
571938fd1498Szrj 
572038fd1498Szrj /* Function vect_grouped_load_supported.
572138fd1498Szrj 
572238fd1498Szrj    COUNT is the size of the load group (the number of statements plus the
572338fd1498Szrj    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
572438fd1498Szrj    only one statement, with a gap of COUNT - 1.
572538fd1498Szrj 
572638fd1498Szrj    Returns true if a suitable permute exists.  */
572738fd1498Szrj 
572838fd1498Szrj bool
vect_grouped_load_supported(tree vectype,bool single_element_p,unsigned HOST_WIDE_INT count)572938fd1498Szrj vect_grouped_load_supported (tree vectype, bool single_element_p,
573038fd1498Szrj 			     unsigned HOST_WIDE_INT count)
573138fd1498Szrj {
573238fd1498Szrj   machine_mode mode = TYPE_MODE (vectype);
573338fd1498Szrj 
573438fd1498Szrj   /* If this is single-element interleaving with an element distance
573538fd1498Szrj      that leaves unused vector loads around punt - we at least create
573638fd1498Szrj      very sub-optimal code in that case (and blow up memory,
573738fd1498Szrj      see PR65518).  */
573838fd1498Szrj   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
573938fd1498Szrj     {
574038fd1498Szrj       if (dump_enabled_p ())
574138fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
574238fd1498Szrj 			 "single-element interleaving not supported "
574338fd1498Szrj 			 "for not adjacent vector loads\n");
574438fd1498Szrj       return false;
574538fd1498Szrj     }
574638fd1498Szrj 
574738fd1498Szrj   /* vect_permute_load_chain requires the group size to be equal to 3 or
574838fd1498Szrj      be a power of two.  */
574938fd1498Szrj   if (count != 3 && exact_log2 (count) == -1)
575038fd1498Szrj     {
575138fd1498Szrj       if (dump_enabled_p ())
575238fd1498Szrj 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
575338fd1498Szrj 			 "the size of the group of accesses"
575438fd1498Szrj 			 " is not a power of 2 or not equal to 3\n");
575538fd1498Szrj       return false;
575638fd1498Szrj     }
575738fd1498Szrj 
575838fd1498Szrj   /* Check that the permutation is supported.  */
575938fd1498Szrj   if (VECTOR_MODE_P (mode))
576038fd1498Szrj     {
576138fd1498Szrj       unsigned int i, j;
576238fd1498Szrj       if (count == 3)
576338fd1498Szrj 	{
576438fd1498Szrj 	  unsigned int nelt;
576538fd1498Szrj 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
576638fd1498Szrj 	    {
576738fd1498Szrj 	      if (dump_enabled_p ())
576838fd1498Szrj 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
576938fd1498Szrj 				 "cannot handle groups of 3 loads for"
577038fd1498Szrj 				 " variable-length vectors\n");
577138fd1498Szrj 	      return false;
577238fd1498Szrj 	    }
577338fd1498Szrj 
577438fd1498Szrj 	  vec_perm_builder sel (nelt, nelt, 1);
577538fd1498Szrj 	  sel.quick_grow (nelt);
577638fd1498Szrj 	  vec_perm_indices indices;
577738fd1498Szrj 	  unsigned int k;
577838fd1498Szrj 	  for (k = 0; k < 3; k++)
577938fd1498Szrj 	    {
578038fd1498Szrj 	      for (i = 0; i < nelt; i++)
578138fd1498Szrj 		if (3 * i + k < 2 * nelt)
578238fd1498Szrj 		  sel[i] = 3 * i + k;
578338fd1498Szrj 		else
578438fd1498Szrj 		  sel[i] = 0;
578538fd1498Szrj 	      indices.new_vector (sel, 2, nelt);
578638fd1498Szrj 	      if (!can_vec_perm_const_p (mode, indices))
578738fd1498Szrj 		{
578838fd1498Szrj 		  if (dump_enabled_p ())
578938fd1498Szrj 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
579038fd1498Szrj 				     "shuffle of 3 loads is not supported by"
579138fd1498Szrj 				     " target\n");
579238fd1498Szrj 		  return false;
579338fd1498Szrj 		}
579438fd1498Szrj 	      for (i = 0, j = 0; i < nelt; i++)
579538fd1498Szrj 		if (3 * i + k < 2 * nelt)
579638fd1498Szrj 		  sel[i] = i;
579738fd1498Szrj 		else
579838fd1498Szrj 		  sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
579938fd1498Szrj 	      indices.new_vector (sel, 2, nelt);
580038fd1498Szrj 	      if (!can_vec_perm_const_p (mode, indices))
580138fd1498Szrj 		{
580238fd1498Szrj 		  if (dump_enabled_p ())
580338fd1498Szrj 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
580438fd1498Szrj 				     "shuffle of 3 loads is not supported by"
580538fd1498Szrj 				     " target\n");
580638fd1498Szrj 		  return false;
580738fd1498Szrj 		}
580838fd1498Szrj 	    }
580938fd1498Szrj 	  return true;
581038fd1498Szrj 	}
581138fd1498Szrj       else
581238fd1498Szrj 	{
581338fd1498Szrj 	  /* If length is not equal to 3 then only power of 2 is supported.  */
581438fd1498Szrj 	  gcc_assert (pow2p_hwi (count));
581538fd1498Szrj 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
581638fd1498Szrj 
581738fd1498Szrj 	  /* The encoding has a single stepped pattern.  */
581838fd1498Szrj 	  vec_perm_builder sel (nelt, 1, 3);
581938fd1498Szrj 	  sel.quick_grow (3);
582038fd1498Szrj 	  for (i = 0; i < 3; i++)
582138fd1498Szrj 	    sel[i] = i * 2;
582238fd1498Szrj 	  vec_perm_indices indices (sel, 2, nelt);
582338fd1498Szrj 	  if (can_vec_perm_const_p (mode, indices))
582438fd1498Szrj 	    {
582538fd1498Szrj 	      for (i = 0; i < 3; i++)
582638fd1498Szrj 		sel[i] = i * 2 + 1;
582738fd1498Szrj 	      indices.new_vector (sel, 2, nelt);
582838fd1498Szrj 	      if (can_vec_perm_const_p (mode, indices))
582938fd1498Szrj 		return true;
583038fd1498Szrj 	    }
583138fd1498Szrj         }
583238fd1498Szrj     }
583338fd1498Szrj 
583438fd1498Szrj   if (dump_enabled_p ())
583538fd1498Szrj     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
583638fd1498Szrj 		     "extract even/odd not supported by target\n");
583738fd1498Szrj   return false;
583838fd1498Szrj }
583938fd1498Szrj 
584038fd1498Szrj /* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
584138fd1498Szrj    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
584238fd1498Szrj 
584338fd1498Szrj bool
vect_load_lanes_supported(tree vectype,unsigned HOST_WIDE_INT count,bool masked_p)584438fd1498Szrj vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
584538fd1498Szrj 			   bool masked_p)
584638fd1498Szrj {
584738fd1498Szrj   if (masked_p)
584838fd1498Szrj     return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
584938fd1498Szrj 					 vec_mask_load_lanes_optab,
585038fd1498Szrj 					 vectype, count);
585138fd1498Szrj   else
585238fd1498Szrj     return vect_lanes_optab_supported_p ("vec_load_lanes",
585338fd1498Szrj 					 vec_load_lanes_optab,
585438fd1498Szrj 					 vectype, count);
585538fd1498Szrj }
585638fd1498Szrj 
585738fd1498Szrj /* Function vect_permute_load_chain.
585838fd1498Szrj 
585938fd1498Szrj    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
586038fd1498Szrj    a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
586138fd1498Szrj    the input data correctly.  Return the final references for loads in
586238fd1498Szrj    RESULT_CHAIN.
586338fd1498Szrj 
586438fd1498Szrj    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
586538fd1498Szrj    The input is 4 vectors each containing 8 elements. We assign a number to each
586638fd1498Szrj    element, the input sequence is:
586738fd1498Szrj 
586838fd1498Szrj    1st vec:   0  1  2  3  4  5  6  7
586938fd1498Szrj    2nd vec:   8  9 10 11 12 13 14 15
587038fd1498Szrj    3rd vec:  16 17 18 19 20 21 22 23
587138fd1498Szrj    4th vec:  24 25 26 27 28 29 30 31
587238fd1498Szrj 
587338fd1498Szrj    The output sequence should be:
587438fd1498Szrj 
587538fd1498Szrj    1st vec:  0 4  8 12 16 20 24 28
587638fd1498Szrj    2nd vec:  1 5  9 13 17 21 25 29
587738fd1498Szrj    3rd vec:  2 6 10 14 18 22 26 30
587838fd1498Szrj    4th vec:  3 7 11 15 19 23 27 31
587938fd1498Szrj 
588038fd1498Szrj    i.e., the first output vector should contain the first elements of each
588138fd1498Szrj    interleaving group, etc.
588238fd1498Szrj 
588338fd1498Szrj    We use extract_even/odd instructions to create such output.  The input of
588438fd1498Szrj    each extract_even/odd operation is two vectors
588538fd1498Szrj    1st vec    2nd vec
588638fd1498Szrj    0 1 2 3    4 5 6 7
588738fd1498Szrj 
588838fd1498Szrj    and the output is the vector of extracted even/odd elements.  The output of
588938fd1498Szrj    extract_even will be:   0 2 4 6
589038fd1498Szrj    and of extract_odd:     1 3 5 7
589138fd1498Szrj 
589238fd1498Szrj 
589338fd1498Szrj    The permutation is done in log LENGTH stages.  In each stage extract_even
589438fd1498Szrj    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
589538fd1498Szrj    their order.  In our example,
589638fd1498Szrj 
589738fd1498Szrj    E1: extract_even (1st vec, 2nd vec)
589838fd1498Szrj    E2: extract_odd (1st vec, 2nd vec)
589938fd1498Szrj    E3: extract_even (3rd vec, 4th vec)
590038fd1498Szrj    E4: extract_odd (3rd vec, 4th vec)
590138fd1498Szrj 
590238fd1498Szrj    The output for the first stage will be:
590338fd1498Szrj 
590438fd1498Szrj    E1:  0  2  4  6  8 10 12 14
590538fd1498Szrj    E2:  1  3  5  7  9 11 13 15
590638fd1498Szrj    E3: 16 18 20 22 24 26 28 30
590738fd1498Szrj    E4: 17 19 21 23 25 27 29 31
590838fd1498Szrj 
590938fd1498Szrj    In order to proceed and create the correct sequence for the next stage (or
591038fd1498Szrj    for the correct output, if the second stage is the last one, as in our
591138fd1498Szrj    example), we first put the output of extract_even operation and then the
591238fd1498Szrj    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
591338fd1498Szrj    The input for the second stage is:
591438fd1498Szrj 
591538fd1498Szrj    1st vec (E1):  0  2  4  6  8 10 12 14
591638fd1498Szrj    2nd vec (E3): 16 18 20 22 24 26 28 30
591738fd1498Szrj    3rd vec (E2):  1  3  5  7  9 11 13 15
591838fd1498Szrj    4th vec (E4): 17 19 21 23 25 27 29 31
591938fd1498Szrj 
592038fd1498Szrj    The output of the second stage:
592138fd1498Szrj 
592238fd1498Szrj    E1: 0 4  8 12 16 20 24 28
592338fd1498Szrj    E2: 2 6 10 14 18 22 26 30
592438fd1498Szrj    E3: 1 5  9 13 17 21 25 29
592538fd1498Szrj    E4: 3 7 11 15 19 23 27 31
592638fd1498Szrj 
592738fd1498Szrj    And RESULT_CHAIN after reordering:
592838fd1498Szrj 
592938fd1498Szrj    1st vec (E1):  0 4  8 12 16 20 24 28
593038fd1498Szrj    2nd vec (E3):  1 5  9 13 17 21 25 29
593138fd1498Szrj    3rd vec (E2):  2 6 10 14 18 22 26 30
593238fd1498Szrj    4th vec (E4):  3 7 11 15 19 23 27 31.  */
593338fd1498Szrj 
593438fd1498Szrj static void
vect_permute_load_chain(vec<tree> dr_chain,unsigned int length,gimple * stmt,gimple_stmt_iterator * gsi,vec<tree> * result_chain)593538fd1498Szrj vect_permute_load_chain (vec<tree> dr_chain,
593638fd1498Szrj 			 unsigned int length,
593738fd1498Szrj 			 gimple *stmt,
593838fd1498Szrj 			 gimple_stmt_iterator *gsi,
593938fd1498Szrj 			 vec<tree> *result_chain)
594038fd1498Szrj {
594138fd1498Szrj   tree data_ref, first_vect, second_vect;
594238fd1498Szrj   tree perm_mask_even, perm_mask_odd;
594338fd1498Szrj   tree perm3_mask_low, perm3_mask_high;
594438fd1498Szrj   gimple *perm_stmt;
594538fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
594638fd1498Szrj   unsigned int i, j, log_length = exact_log2 (length);
594738fd1498Szrj 
594838fd1498Szrj   result_chain->quick_grow (length);
594938fd1498Szrj   memcpy (result_chain->address (), dr_chain.address (),
595038fd1498Szrj 	  length * sizeof (tree));
595138fd1498Szrj 
595238fd1498Szrj   if (length == 3)
595338fd1498Szrj     {
595438fd1498Szrj       /* vect_grouped_load_supported ensures that this is constant.  */
595538fd1498Szrj       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
595638fd1498Szrj       unsigned int k;
595738fd1498Szrj 
595838fd1498Szrj       vec_perm_builder sel (nelt, nelt, 1);
595938fd1498Szrj       sel.quick_grow (nelt);
596038fd1498Szrj       vec_perm_indices indices;
596138fd1498Szrj       for (k = 0; k < 3; k++)
596238fd1498Szrj 	{
596338fd1498Szrj 	  for (i = 0; i < nelt; i++)
596438fd1498Szrj 	    if (3 * i + k < 2 * nelt)
596538fd1498Szrj 	      sel[i] = 3 * i + k;
596638fd1498Szrj 	    else
596738fd1498Szrj 	      sel[i] = 0;
596838fd1498Szrj 	  indices.new_vector (sel, 2, nelt);
596938fd1498Szrj 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
597038fd1498Szrj 
597138fd1498Szrj 	  for (i = 0, j = 0; i < nelt; i++)
597238fd1498Szrj 	    if (3 * i + k < 2 * nelt)
597338fd1498Szrj 	      sel[i] = i;
597438fd1498Szrj 	    else
597538fd1498Szrj 	      sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
597638fd1498Szrj 	  indices.new_vector (sel, 2, nelt);
597738fd1498Szrj 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
597838fd1498Szrj 
597938fd1498Szrj 	  first_vect = dr_chain[0];
598038fd1498Szrj 	  second_vect = dr_chain[1];
598138fd1498Szrj 
598238fd1498Szrj 	  /* Create interleaving stmt (low part of):
598338fd1498Szrj 	     low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
598438fd1498Szrj 							     ...}>  */
598538fd1498Szrj 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
598638fd1498Szrj 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
598738fd1498Szrj 					   second_vect, perm3_mask_low);
598838fd1498Szrj 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
598938fd1498Szrj 
599038fd1498Szrj 	  /* Create interleaving stmt (high part of):
599138fd1498Szrj 	     high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
599238fd1498Szrj 							      ...}>  */
599338fd1498Szrj 	  first_vect = data_ref;
599438fd1498Szrj 	  second_vect = dr_chain[2];
599538fd1498Szrj 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
599638fd1498Szrj 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
599738fd1498Szrj 					   second_vect, perm3_mask_high);
599838fd1498Szrj 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
599938fd1498Szrj 	  (*result_chain)[k] = data_ref;
600038fd1498Szrj 	}
600138fd1498Szrj     }
600238fd1498Szrj   else
600338fd1498Szrj     {
600438fd1498Szrj       /* If length is not equal to 3 then only power of 2 is supported.  */
600538fd1498Szrj       gcc_assert (pow2p_hwi (length));
600638fd1498Szrj 
600738fd1498Szrj       /* The encoding has a single stepped pattern.  */
600838fd1498Szrj       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
600938fd1498Szrj       vec_perm_builder sel (nelt, 1, 3);
601038fd1498Szrj       sel.quick_grow (3);
601138fd1498Szrj       for (i = 0; i < 3; ++i)
601238fd1498Szrj 	sel[i] = i * 2;
601338fd1498Szrj       vec_perm_indices indices (sel, 2, nelt);
601438fd1498Szrj       perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
601538fd1498Szrj 
601638fd1498Szrj       for (i = 0; i < 3; ++i)
601738fd1498Szrj 	sel[i] = i * 2 + 1;
601838fd1498Szrj       indices.new_vector (sel, 2, nelt);
601938fd1498Szrj       perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
602038fd1498Szrj 
602138fd1498Szrj       for (i = 0; i < log_length; i++)
602238fd1498Szrj 	{
602338fd1498Szrj 	  for (j = 0; j < length; j += 2)
602438fd1498Szrj 	    {
602538fd1498Szrj 	      first_vect = dr_chain[j];
602638fd1498Szrj 	      second_vect = dr_chain[j+1];
602738fd1498Szrj 
602838fd1498Szrj 	      /* data_ref = permute_even (first_data_ref, second_data_ref);  */
602938fd1498Szrj 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
603038fd1498Szrj 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
603138fd1498Szrj 					       first_vect, second_vect,
603238fd1498Szrj 					       perm_mask_even);
603338fd1498Szrj 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
603438fd1498Szrj 	      (*result_chain)[j/2] = data_ref;
603538fd1498Szrj 
603638fd1498Szrj 	      /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
603738fd1498Szrj 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
603838fd1498Szrj 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
603938fd1498Szrj 					       first_vect, second_vect,
604038fd1498Szrj 					       perm_mask_odd);
604138fd1498Szrj 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
604238fd1498Szrj 	      (*result_chain)[j/2+length/2] = data_ref;
604338fd1498Szrj 	    }
604438fd1498Szrj 	  memcpy (dr_chain.address (), result_chain->address (),
604538fd1498Szrj 		  length * sizeof (tree));
604638fd1498Szrj 	}
604738fd1498Szrj     }
604838fd1498Szrj }
604938fd1498Szrj 
605038fd1498Szrj /* Function vect_shift_permute_load_chain.
605138fd1498Szrj 
605238fd1498Szrj    Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
605338fd1498Szrj    sequence of stmts to reorder the input data accordingly.
605438fd1498Szrj    Return the final references for loads in RESULT_CHAIN.
605538fd1498Szrj    Return true if successed, false otherwise.
605638fd1498Szrj 
605738fd1498Szrj    E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
605838fd1498Szrj    The input is 3 vectors each containing 8 elements.  We assign a
605938fd1498Szrj    number to each element, the input sequence is:
606038fd1498Szrj 
606138fd1498Szrj    1st vec:   0  1  2  3  4  5  6  7
606238fd1498Szrj    2nd vec:   8  9 10 11 12 13 14 15
606338fd1498Szrj    3rd vec:  16 17 18 19 20 21 22 23
606438fd1498Szrj 
606538fd1498Szrj    The output sequence should be:
606638fd1498Szrj 
606738fd1498Szrj    1st vec:  0 3 6  9 12 15 18 21
606838fd1498Szrj    2nd vec:  1 4 7 10 13 16 19 22
606938fd1498Szrj    3rd vec:  2 5 8 11 14 17 20 23
607038fd1498Szrj 
607138fd1498Szrj    We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
607238fd1498Szrj 
607338fd1498Szrj    First we shuffle all 3 vectors to get correct elements order:
607438fd1498Szrj 
607538fd1498Szrj    1st vec:  ( 0  3  6) ( 1  4  7) ( 2  5)
607638fd1498Szrj    2nd vec:  ( 8 11 14) ( 9 12 15) (10 13)
607738fd1498Szrj    3rd vec:  (16 19 22) (17 20 23) (18 21)
607838fd1498Szrj 
607938fd1498Szrj    Next we unite and shift vector 3 times:
608038fd1498Szrj 
608138fd1498Szrj    1st step:
608238fd1498Szrj      shift right by 6 the concatenation of:
608338fd1498Szrj      "1st vec" and  "2nd vec"
608438fd1498Szrj        ( 0  3  6) ( 1  4  7) |( 2  5) _ ( 8 11 14) ( 9 12 15)| (10 13)
608538fd1498Szrj      "2nd vec" and  "3rd vec"
608638fd1498Szrj        ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
608738fd1498Szrj      "3rd vec" and  "1st vec"
608838fd1498Szrj        (16 19 22) (17 20 23) |(18 21) _ ( 0  3  6) ( 1  4  7)| ( 2  5)
608938fd1498Szrj 			     | New vectors                   |
609038fd1498Szrj 
609138fd1498Szrj      So that now new vectors are:
609238fd1498Szrj 
609338fd1498Szrj      1st vec:  ( 2  5) ( 8 11 14) ( 9 12 15)
609438fd1498Szrj      2nd vec:  (10 13) (16 19 22) (17 20 23)
609538fd1498Szrj      3rd vec:  (18 21) ( 0  3  6) ( 1  4  7)
609638fd1498Szrj 
609738fd1498Szrj    2nd step:
609838fd1498Szrj      shift right by 5 the concatenation of:
609938fd1498Szrj      "1st vec" and  "3rd vec"
610038fd1498Szrj        ( 2  5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0  3  6)| ( 1  4  7)
610138fd1498Szrj      "2nd vec" and  "1st vec"
610238fd1498Szrj        (10 13) (16 19 22) |(17 20 23) _ ( 2  5) ( 8 11 14)| ( 9 12 15)
610338fd1498Szrj      "3rd vec" and  "2nd vec"
610438fd1498Szrj        (18 21) ( 0  3  6) |( 1  4  7) _ (10 13) (16 19 22)| (17 20 23)
610538fd1498Szrj 			  | New vectors                   |
610638fd1498Szrj 
610738fd1498Szrj      So that now new vectors are:
610838fd1498Szrj 
610938fd1498Szrj      1st vec:  ( 9 12 15) (18 21) ( 0  3  6)
611038fd1498Szrj      2nd vec:  (17 20 23) ( 2  5) ( 8 11 14)
611138fd1498Szrj      3rd vec:  ( 1  4  7) (10 13) (16 19 22) READY
611238fd1498Szrj 
611338fd1498Szrj    3rd step:
611438fd1498Szrj      shift right by 5 the concatenation of:
611538fd1498Szrj      "1st vec" and  "1st vec"
611638fd1498Szrj        ( 9 12 15) (18 21) |( 0  3  6) _ ( 9 12 15) (18 21)| ( 0  3  6)
611738fd1498Szrj      shift right by 3 the concatenation of:
611838fd1498Szrj      "2nd vec" and  "2nd vec"
611938fd1498Szrj                (17 20 23) |( 2  5) ( 8 11 14) _ (17 20 23)| ( 2  5) ( 8 11 14)
612038fd1498Szrj 			  | New vectors                   |
612138fd1498Szrj 
612238fd1498Szrj      So that now all vectors are READY:
612338fd1498Szrj      1st vec:  ( 0  3  6) ( 9 12 15) (18 21)
612438fd1498Szrj      2nd vec:  ( 2  5) ( 8 11 14) (17 20 23)
612538fd1498Szrj      3rd vec:  ( 1  4  7) (10 13) (16 19 22)
612638fd1498Szrj 
612738fd1498Szrj    This algorithm is faster than one in vect_permute_load_chain if:
612838fd1498Szrj      1.  "shift of a concatination" is faster than general permutation.
612938fd1498Szrj 	 This is usually so.
613038fd1498Szrj      2.  The TARGET machine can't execute vector instructions in parallel.
613138fd1498Szrj 	 This is because each step of the algorithm depends on previous.
613238fd1498Szrj 	 The algorithm in vect_permute_load_chain is much more parallel.
613338fd1498Szrj 
613438fd1498Szrj    The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
613538fd1498Szrj */
613638fd1498Szrj 
613738fd1498Szrj static bool
vect_shift_permute_load_chain(vec<tree> dr_chain,unsigned int length,gimple * stmt,gimple_stmt_iterator * gsi,vec<tree> * result_chain)613838fd1498Szrj vect_shift_permute_load_chain (vec<tree> dr_chain,
613938fd1498Szrj 			       unsigned int length,
614038fd1498Szrj 			       gimple *stmt,
614138fd1498Szrj 			       gimple_stmt_iterator *gsi,
614238fd1498Szrj 			       vec<tree> *result_chain)
614338fd1498Szrj {
614438fd1498Szrj   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
614538fd1498Szrj   tree perm2_mask1, perm2_mask2, perm3_mask;
614638fd1498Szrj   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
614738fd1498Szrj   gimple *perm_stmt;
614838fd1498Szrj 
614938fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
615038fd1498Szrj   unsigned int i;
615138fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
615238fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
615338fd1498Szrj 
615438fd1498Szrj   unsigned HOST_WIDE_INT nelt, vf;
615538fd1498Szrj   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
615638fd1498Szrj       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
615738fd1498Szrj     /* Not supported for variable-length vectors.  */
615838fd1498Szrj     return false;
615938fd1498Szrj 
616038fd1498Szrj   vec_perm_builder sel (nelt, nelt, 1);
616138fd1498Szrj   sel.quick_grow (nelt);
616238fd1498Szrj 
616338fd1498Szrj   result_chain->quick_grow (length);
616438fd1498Szrj   memcpy (result_chain->address (), dr_chain.address (),
616538fd1498Szrj 	  length * sizeof (tree));
616638fd1498Szrj 
616738fd1498Szrj   if (pow2p_hwi (length) && vf > 4)
616838fd1498Szrj     {
616938fd1498Szrj       unsigned int j, log_length = exact_log2 (length);
617038fd1498Szrj       for (i = 0; i < nelt / 2; ++i)
617138fd1498Szrj 	sel[i] = i * 2;
617238fd1498Szrj       for (i = 0; i < nelt / 2; ++i)
617338fd1498Szrj 	sel[nelt / 2 + i] = i * 2 + 1;
617438fd1498Szrj       vec_perm_indices indices (sel, 2, nelt);
617538fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
617638fd1498Szrj 	{
617738fd1498Szrj 	  if (dump_enabled_p ())
617838fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
617938fd1498Szrj 			     "shuffle of 2 fields structure is not \
618038fd1498Szrj 			      supported by target\n");
618138fd1498Szrj 	  return false;
618238fd1498Szrj 	}
618338fd1498Szrj       perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
618438fd1498Szrj 
618538fd1498Szrj       for (i = 0; i < nelt / 2; ++i)
618638fd1498Szrj 	sel[i] = i * 2 + 1;
618738fd1498Szrj       for (i = 0; i < nelt / 2; ++i)
618838fd1498Szrj 	sel[nelt / 2 + i] = i * 2;
618938fd1498Szrj       indices.new_vector (sel, 2, nelt);
619038fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
619138fd1498Szrj 	{
619238fd1498Szrj 	  if (dump_enabled_p ())
619338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
619438fd1498Szrj 			     "shuffle of 2 fields structure is not \
619538fd1498Szrj 			      supported by target\n");
619638fd1498Szrj 	  return false;
619738fd1498Szrj 	}
619838fd1498Szrj       perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
619938fd1498Szrj 
620038fd1498Szrj       /* Generating permutation constant to shift all elements.
620138fd1498Szrj 	 For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
620238fd1498Szrj       for (i = 0; i < nelt; i++)
620338fd1498Szrj 	sel[i] = nelt / 2 + i;
620438fd1498Szrj       indices.new_vector (sel, 2, nelt);
620538fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
620638fd1498Szrj 	{
620738fd1498Szrj 	  if (dump_enabled_p ())
620838fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
620938fd1498Szrj 			     "shift permutation is not supported by target\n");
621038fd1498Szrj 	  return false;
621138fd1498Szrj 	}
621238fd1498Szrj       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
621338fd1498Szrj 
621438fd1498Szrj       /* Generating permutation constant to select vector from 2.
621538fd1498Szrj 	 For vector length 8 it is {0 1 2 3 12 13 14 15}.  */
621638fd1498Szrj       for (i = 0; i < nelt / 2; i++)
621738fd1498Szrj 	sel[i] = i;
621838fd1498Szrj       for (i = nelt / 2; i < nelt; i++)
621938fd1498Szrj 	sel[i] = nelt + i;
622038fd1498Szrj       indices.new_vector (sel, 2, nelt);
622138fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
622238fd1498Szrj 	{
622338fd1498Szrj 	  if (dump_enabled_p ())
622438fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
622538fd1498Szrj 			     "select is not supported by target\n");
622638fd1498Szrj 	  return false;
622738fd1498Szrj 	}
622838fd1498Szrj       select_mask = vect_gen_perm_mask_checked (vectype, indices);
622938fd1498Szrj 
623038fd1498Szrj       for (i = 0; i < log_length; i++)
623138fd1498Szrj 	{
623238fd1498Szrj 	  for (j = 0; j < length; j += 2)
623338fd1498Szrj 	    {
623438fd1498Szrj 	      first_vect = dr_chain[j];
623538fd1498Szrj 	      second_vect = dr_chain[j + 1];
623638fd1498Szrj 
623738fd1498Szrj 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
623838fd1498Szrj 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
623938fd1498Szrj 					       first_vect, first_vect,
624038fd1498Szrj 					       perm2_mask1);
624138fd1498Szrj 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
624238fd1498Szrj 	      vect[0] = data_ref;
624338fd1498Szrj 
624438fd1498Szrj 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
624538fd1498Szrj 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
624638fd1498Szrj 					       second_vect, second_vect,
624738fd1498Szrj 					       perm2_mask2);
624838fd1498Szrj 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
624938fd1498Szrj 	      vect[1] = data_ref;
625038fd1498Szrj 
625138fd1498Szrj 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
625238fd1498Szrj 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
625338fd1498Szrj 					       vect[0], vect[1], shift1_mask);
625438fd1498Szrj 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
625538fd1498Szrj 	      (*result_chain)[j/2 + length/2] = data_ref;
625638fd1498Szrj 
625738fd1498Szrj 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
625838fd1498Szrj 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
625938fd1498Szrj 					       vect[0], vect[1], select_mask);
626038fd1498Szrj 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
626138fd1498Szrj 	      (*result_chain)[j/2] = data_ref;
626238fd1498Szrj 	    }
626338fd1498Szrj 	  memcpy (dr_chain.address (), result_chain->address (),
626438fd1498Szrj 		  length * sizeof (tree));
626538fd1498Szrj 	}
626638fd1498Szrj       return true;
626738fd1498Szrj     }
626838fd1498Szrj   if (length == 3 && vf > 2)
626938fd1498Szrj     {
627038fd1498Szrj       unsigned int k = 0, l = 0;
627138fd1498Szrj 
627238fd1498Szrj       /* Generating permutation constant to get all elements in rigth order.
627338fd1498Szrj 	 For vector length 8 it is {0 3 6 1 4 7 2 5}.  */
627438fd1498Szrj       for (i = 0; i < nelt; i++)
627538fd1498Szrj 	{
627638fd1498Szrj 	  if (3 * k + (l % 3) >= nelt)
627738fd1498Szrj 	    {
627838fd1498Szrj 	      k = 0;
627938fd1498Szrj 	      l += (3 - (nelt % 3));
628038fd1498Szrj 	    }
628138fd1498Szrj 	  sel[i] = 3 * k + (l % 3);
628238fd1498Szrj 	  k++;
628338fd1498Szrj 	}
628438fd1498Szrj       vec_perm_indices indices (sel, 2, nelt);
628538fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
628638fd1498Szrj 	{
628738fd1498Szrj 	  if (dump_enabled_p ())
628838fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
628938fd1498Szrj 			     "shuffle of 3 fields structure is not \
629038fd1498Szrj 			      supported by target\n");
629138fd1498Szrj 	  return false;
629238fd1498Szrj 	}
629338fd1498Szrj       perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
629438fd1498Szrj 
629538fd1498Szrj       /* Generating permutation constant to shift all elements.
629638fd1498Szrj 	 For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
629738fd1498Szrj       for (i = 0; i < nelt; i++)
629838fd1498Szrj 	sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
629938fd1498Szrj       indices.new_vector (sel, 2, nelt);
630038fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
630138fd1498Szrj 	{
630238fd1498Szrj 	  if (dump_enabled_p ())
630338fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
630438fd1498Szrj 			     "shift permutation is not supported by target\n");
630538fd1498Szrj 	  return false;
630638fd1498Szrj 	}
630738fd1498Szrj       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
630838fd1498Szrj 
630938fd1498Szrj       /* Generating permutation constant to shift all elements.
631038fd1498Szrj 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
631138fd1498Szrj       for (i = 0; i < nelt; i++)
631238fd1498Szrj 	sel[i] = 2 * (nelt / 3) + 1 + i;
631338fd1498Szrj       indices.new_vector (sel, 2, nelt);
631438fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
631538fd1498Szrj 	{
631638fd1498Szrj 	  if (dump_enabled_p ())
631738fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
631838fd1498Szrj 			     "shift permutation is not supported by target\n");
631938fd1498Szrj 	  return false;
632038fd1498Szrj 	}
632138fd1498Szrj       shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
632238fd1498Szrj 
632338fd1498Szrj       /* Generating permutation constant to shift all elements.
632438fd1498Szrj 	 For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
632538fd1498Szrj       for (i = 0; i < nelt; i++)
632638fd1498Szrj 	sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
632738fd1498Szrj       indices.new_vector (sel, 2, nelt);
632838fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
632938fd1498Szrj 	{
633038fd1498Szrj 	  if (dump_enabled_p ())
633138fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
633238fd1498Szrj 			     "shift permutation is not supported by target\n");
633338fd1498Szrj 	  return false;
633438fd1498Szrj 	}
633538fd1498Szrj       shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
633638fd1498Szrj 
633738fd1498Szrj       /* Generating permutation constant to shift all elements.
633838fd1498Szrj 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
633938fd1498Szrj       for (i = 0; i < nelt; i++)
634038fd1498Szrj 	sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
634138fd1498Szrj       indices.new_vector (sel, 2, nelt);
634238fd1498Szrj       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
634338fd1498Szrj 	{
634438fd1498Szrj 	  if (dump_enabled_p ())
634538fd1498Szrj 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
634638fd1498Szrj 			     "shift permutation is not supported by target\n");
634738fd1498Szrj 	  return false;
634838fd1498Szrj 	}
634938fd1498Szrj       shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
635038fd1498Szrj 
635138fd1498Szrj       for (k = 0; k < 3; k++)
635238fd1498Szrj 	{
635338fd1498Szrj 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
635438fd1498Szrj 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
635538fd1498Szrj 					   dr_chain[k], dr_chain[k],
635638fd1498Szrj 					   perm3_mask);
635738fd1498Szrj 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
635838fd1498Szrj 	  vect[k] = data_ref;
635938fd1498Szrj 	}
636038fd1498Szrj 
636138fd1498Szrj       for (k = 0; k < 3; k++)
636238fd1498Szrj 	{
636338fd1498Szrj 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
636438fd1498Szrj 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
636538fd1498Szrj 					   vect[k % 3], vect[(k + 1) % 3],
636638fd1498Szrj 					   shift1_mask);
636738fd1498Szrj 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
636838fd1498Szrj 	  vect_shift[k] = data_ref;
636938fd1498Szrj 	}
637038fd1498Szrj 
637138fd1498Szrj       for (k = 0; k < 3; k++)
637238fd1498Szrj 	{
637338fd1498Szrj 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
637438fd1498Szrj 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
637538fd1498Szrj 					   vect_shift[(4 - k) % 3],
637638fd1498Szrj 					   vect_shift[(3 - k) % 3],
637738fd1498Szrj 					   shift2_mask);
637838fd1498Szrj 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
637938fd1498Szrj 	  vect[k] = data_ref;
638038fd1498Szrj 	}
638138fd1498Szrj 
638238fd1498Szrj       (*result_chain)[3 - (nelt % 3)] = vect[2];
638338fd1498Szrj 
638438fd1498Szrj       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
638538fd1498Szrj       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
638638fd1498Szrj 				       vect[0], shift3_mask);
638738fd1498Szrj       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
638838fd1498Szrj       (*result_chain)[nelt % 3] = data_ref;
638938fd1498Szrj 
639038fd1498Szrj       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
639138fd1498Szrj       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
639238fd1498Szrj 				       vect[1], shift4_mask);
639338fd1498Szrj       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
639438fd1498Szrj       (*result_chain)[0] = data_ref;
639538fd1498Szrj       return true;
639638fd1498Szrj     }
639738fd1498Szrj   return false;
639838fd1498Szrj }
639938fd1498Szrj 
640038fd1498Szrj /* Function vect_transform_grouped_load.
640138fd1498Szrj 
640238fd1498Szrj    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
640338fd1498Szrj    to perform their permutation and ascribe the result vectorized statements to
640438fd1498Szrj    the scalar statements.
640538fd1498Szrj */
640638fd1498Szrj 
640738fd1498Szrj void
vect_transform_grouped_load(gimple * stmt,vec<tree> dr_chain,int size,gimple_stmt_iterator * gsi)640838fd1498Szrj vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
640938fd1498Szrj 			     gimple_stmt_iterator *gsi)
641038fd1498Szrj {
641138fd1498Szrj   machine_mode mode;
641238fd1498Szrj   vec<tree> result_chain = vNULL;
641338fd1498Szrj 
641438fd1498Szrj   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
641538fd1498Szrj      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
641638fd1498Szrj      vectors, that are ready for vector computation.  */
641738fd1498Szrj   result_chain.create (size);
641838fd1498Szrj 
641938fd1498Szrj   /* If reassociation width for vector type is 2 or greater target machine can
642038fd1498Szrj      execute 2 or more vector instructions in parallel.  Otherwise try to
642138fd1498Szrj      get chain for loads group using vect_shift_permute_load_chain.  */
642238fd1498Szrj   mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
642338fd1498Szrj   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
642438fd1498Szrj       || pow2p_hwi (size)
642538fd1498Szrj       || !vect_shift_permute_load_chain (dr_chain, size, stmt,
642638fd1498Szrj 					 gsi, &result_chain))
642738fd1498Szrj     vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
642838fd1498Szrj   vect_record_grouped_load_vectors (stmt, result_chain);
642938fd1498Szrj   result_chain.release ();
643038fd1498Szrj }
643138fd1498Szrj 
643238fd1498Szrj /* RESULT_CHAIN contains the output of a group of grouped loads that were
643338fd1498Szrj    generated as part of the vectorization of STMT.  Assign the statement
643438fd1498Szrj    for each vector to the associated scalar statement.  */
643538fd1498Szrj 
643638fd1498Szrj void
vect_record_grouped_load_vectors(gimple * stmt,vec<tree> result_chain)643738fd1498Szrj vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
643838fd1498Szrj {
643938fd1498Szrj   gimple *first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
644038fd1498Szrj   gimple *next_stmt, *new_stmt;
644138fd1498Szrj   unsigned int i, gap_count;
644238fd1498Szrj   tree tmp_data_ref;
644338fd1498Szrj 
644438fd1498Szrj   /* Put a permuted data-ref in the VECTORIZED_STMT field.
644538fd1498Szrj      Since we scan the chain starting from it's first node, their order
644638fd1498Szrj      corresponds the order of data-refs in RESULT_CHAIN.  */
644738fd1498Szrj   next_stmt = first_stmt;
644838fd1498Szrj   gap_count = 1;
644938fd1498Szrj   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
645038fd1498Szrj     {
645138fd1498Szrj       if (!next_stmt)
645238fd1498Szrj 	break;
645338fd1498Szrj 
645438fd1498Szrj       /* Skip the gaps.  Loads created for the gaps will be removed by dead
645538fd1498Szrj        code elimination pass later.  No need to check for the first stmt in
645638fd1498Szrj        the group, since it always exists.
645738fd1498Szrj        GROUP_GAP is the number of steps in elements from the previous
645838fd1498Szrj        access (if there is no gap GROUP_GAP is 1).  We skip loads that
645938fd1498Szrj        correspond to the gaps.  */
646038fd1498Szrj       if (next_stmt != first_stmt
646138fd1498Szrj           && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
646238fd1498Szrj       {
646338fd1498Szrj         gap_count++;
646438fd1498Szrj         continue;
646538fd1498Szrj       }
646638fd1498Szrj 
646738fd1498Szrj       while (next_stmt)
646838fd1498Szrj         {
646938fd1498Szrj 	  new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
647038fd1498Szrj 	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
647138fd1498Szrj 	     copies, and we put the new vector statement in the first available
647238fd1498Szrj 	     RELATED_STMT.  */
647338fd1498Szrj 	  if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
647438fd1498Szrj 	    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
647538fd1498Szrj 	  else
647638fd1498Szrj             {
647738fd1498Szrj               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
647838fd1498Szrj                 {
647938fd1498Szrj 		  gimple *prev_stmt =
648038fd1498Szrj 		    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
648138fd1498Szrj 		  gimple *rel_stmt =
648238fd1498Szrj 		    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
648338fd1498Szrj 	          while (rel_stmt)
648438fd1498Szrj 		    {
648538fd1498Szrj 		      prev_stmt = rel_stmt;
648638fd1498Szrj 		      rel_stmt =
648738fd1498Szrj                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
648838fd1498Szrj 		    }
648938fd1498Szrj 
649038fd1498Szrj   	          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
649138fd1498Szrj                     new_stmt;
649238fd1498Szrj                 }
649338fd1498Szrj             }
649438fd1498Szrj 
649538fd1498Szrj 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
649638fd1498Szrj 	  gap_count = 1;
649738fd1498Szrj 	  /* If NEXT_STMT accesses the same DR as the previous statement,
649838fd1498Szrj 	     put the same TMP_DATA_REF as its vectorized statement; otherwise
649938fd1498Szrj 	     get the next data-ref from RESULT_CHAIN.  */
650038fd1498Szrj 	  if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
650138fd1498Szrj 	    break;
650238fd1498Szrj         }
650338fd1498Szrj     }
650438fd1498Szrj }
650538fd1498Szrj 
650638fd1498Szrj /* Function vect_force_dr_alignment_p.
650738fd1498Szrj 
650838fd1498Szrj    Returns whether the alignment of a DECL can be forced to be aligned
650938fd1498Szrj    on ALIGNMENT bit boundary.  */
651038fd1498Szrj 
651138fd1498Szrj bool
vect_can_force_dr_alignment_p(const_tree decl,unsigned int alignment)651238fd1498Szrj vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
651338fd1498Szrj {
651438fd1498Szrj   if (!VAR_P (decl))
651538fd1498Szrj     return false;
651638fd1498Szrj 
651738fd1498Szrj   if (decl_in_symtab_p (decl)
651838fd1498Szrj       && !symtab_node::get (decl)->can_increase_alignment_p ())
651938fd1498Szrj     return false;
652038fd1498Szrj 
652138fd1498Szrj   if (TREE_STATIC (decl))
652238fd1498Szrj     return (alignment <= MAX_OFILE_ALIGNMENT);
652338fd1498Szrj   else
652438fd1498Szrj     return (alignment <= MAX_STACK_ALIGNMENT);
652538fd1498Szrj }
652638fd1498Szrj 
652738fd1498Szrj 
652838fd1498Szrj /* Return whether the data reference DR is supported with respect to its
652938fd1498Szrj    alignment.
653038fd1498Szrj    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
653138fd1498Szrj    it is aligned, i.e., check if it is possible to vectorize it with different
653238fd1498Szrj    alignment.  */
653338fd1498Szrj 
653438fd1498Szrj enum dr_alignment_support
vect_supportable_dr_alignment(struct data_reference * dr,bool check_aligned_accesses)653538fd1498Szrj vect_supportable_dr_alignment (struct data_reference *dr,
653638fd1498Szrj                                bool check_aligned_accesses)
653738fd1498Szrj {
653838fd1498Szrj   gimple *stmt = DR_STMT (dr);
653938fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
654038fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
654138fd1498Szrj   machine_mode mode = TYPE_MODE (vectype);
654238fd1498Szrj   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
654338fd1498Szrj   struct loop *vect_loop = NULL;
654438fd1498Szrj   bool nested_in_vect_loop = false;
654538fd1498Szrj 
654638fd1498Szrj   if (aligned_access_p (dr) && !check_aligned_accesses)
654738fd1498Szrj     return dr_aligned;
654838fd1498Szrj 
654938fd1498Szrj   /* For now assume all conditional loads/stores support unaligned
655038fd1498Szrj      access without any special code.  */
655138fd1498Szrj   if (is_gimple_call (stmt)
655238fd1498Szrj       && gimple_call_internal_p (stmt)
655338fd1498Szrj       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
655438fd1498Szrj 	  || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
655538fd1498Szrj     return dr_unaligned_supported;
655638fd1498Szrj 
655738fd1498Szrj   if (loop_vinfo)
655838fd1498Szrj     {
655938fd1498Szrj       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
656038fd1498Szrj       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
656138fd1498Szrj     }
656238fd1498Szrj 
656338fd1498Szrj   /* Possibly unaligned access.  */
656438fd1498Szrj 
656538fd1498Szrj   /* We can choose between using the implicit realignment scheme (generating
656638fd1498Szrj      a misaligned_move stmt) and the explicit realignment scheme (generating
656738fd1498Szrj      aligned loads with a REALIGN_LOAD).  There are two variants to the
656838fd1498Szrj      explicit realignment scheme: optimized, and unoptimized.
656938fd1498Szrj      We can optimize the realignment only if the step between consecutive
657038fd1498Szrj      vector loads is equal to the vector size.  Since the vector memory
657138fd1498Szrj      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
657238fd1498Szrj      is guaranteed that the misalignment amount remains the same throughout the
657338fd1498Szrj      execution of the vectorized loop.  Therefore, we can create the
657438fd1498Szrj      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
657538fd1498Szrj      at the loop preheader.
657638fd1498Szrj 
657738fd1498Szrj      However, in the case of outer-loop vectorization, when vectorizing a
657838fd1498Szrj      memory access in the inner-loop nested within the LOOP that is now being
657938fd1498Szrj      vectorized, while it is guaranteed that the misalignment of the
658038fd1498Szrj      vectorized memory access will remain the same in different outer-loop
658138fd1498Szrj      iterations, it is *not* guaranteed that is will remain the same throughout
658238fd1498Szrj      the execution of the inner-loop.  This is because the inner-loop advances
658338fd1498Szrj      with the original scalar step (and not in steps of VS).  If the inner-loop
658438fd1498Szrj      step happens to be a multiple of VS, then the misalignment remains fixed
658538fd1498Szrj      and we can use the optimized realignment scheme.  For example:
658638fd1498Szrj 
658738fd1498Szrj       for (i=0; i<N; i++)
658838fd1498Szrj         for (j=0; j<M; j++)
658938fd1498Szrj           s += a[i+j];
659038fd1498Szrj 
659138fd1498Szrj      When vectorizing the i-loop in the above example, the step between
659238fd1498Szrj      consecutive vector loads is 1, and so the misalignment does not remain
659338fd1498Szrj      fixed across the execution of the inner-loop, and the realignment cannot
659438fd1498Szrj      be optimized (as illustrated in the following pseudo vectorized loop):
659538fd1498Szrj 
659638fd1498Szrj       for (i=0; i<N; i+=4)
659738fd1498Szrj         for (j=0; j<M; j++){
659838fd1498Szrj           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
659938fd1498Szrj                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
660038fd1498Szrj                          // (assuming that we start from an aligned address).
660138fd1498Szrj           }
660238fd1498Szrj 
660338fd1498Szrj      We therefore have to use the unoptimized realignment scheme:
660438fd1498Szrj 
660538fd1498Szrj       for (i=0; i<N; i+=4)
660638fd1498Szrj           for (j=k; j<M; j+=4)
660738fd1498Szrj           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
660838fd1498Szrj                            // that the misalignment of the initial address is
660938fd1498Szrj                            // 0).
661038fd1498Szrj 
661138fd1498Szrj      The loop can then be vectorized as follows:
661238fd1498Szrj 
661338fd1498Szrj       for (k=0; k<4; k++){
661438fd1498Szrj         rt = get_realignment_token (&vp[k]);
661538fd1498Szrj         for (i=0; i<N; i+=4){
661638fd1498Szrj           v1 = vp[i+k];
661738fd1498Szrj           for (j=k; j<M; j+=4){
661838fd1498Szrj             v2 = vp[i+j+VS-1];
661938fd1498Szrj             va = REALIGN_LOAD <v1,v2,rt>;
662038fd1498Szrj             vs += va;
662138fd1498Szrj             v1 = v2;
662238fd1498Szrj           }
662338fd1498Szrj         }
662438fd1498Szrj     } */
662538fd1498Szrj 
662638fd1498Szrj   if (DR_IS_READ (dr))
662738fd1498Szrj     {
662838fd1498Szrj       bool is_packed = false;
662938fd1498Szrj       tree type = (TREE_TYPE (DR_REF (dr)));
663038fd1498Szrj 
663138fd1498Szrj       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
663238fd1498Szrj 	  && (!targetm.vectorize.builtin_mask_for_load
663338fd1498Szrj 	      || targetm.vectorize.builtin_mask_for_load ()))
663438fd1498Szrj 	{
663538fd1498Szrj 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
663638fd1498Szrj 
663738fd1498Szrj 	  /* If we are doing SLP then the accesses need not have the
663838fd1498Szrj 	     same alignment, instead it depends on the SLP group size.  */
663938fd1498Szrj 	  if (loop_vinfo
664038fd1498Szrj 	      && STMT_SLP_TYPE (stmt_info)
664138fd1498Szrj 	      && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
664238fd1498Szrj 			      * GROUP_SIZE (vinfo_for_stmt
664338fd1498Szrj 					    (GROUP_FIRST_ELEMENT (stmt_info))),
664438fd1498Szrj 			      TYPE_VECTOR_SUBPARTS (vectype)))
664538fd1498Szrj 	    ;
664638fd1498Szrj 	  else if (!loop_vinfo
664738fd1498Szrj 		   || (nested_in_vect_loop
664838fd1498Szrj 		       && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
664938fd1498Szrj 				    GET_MODE_SIZE (TYPE_MODE (vectype)))))
665038fd1498Szrj 	    return dr_explicit_realign;
665138fd1498Szrj 	  else
665238fd1498Szrj 	    return dr_explicit_realign_optimized;
665338fd1498Szrj 	}
665438fd1498Szrj       if (!known_alignment_for_access_p (dr))
665538fd1498Szrj 	is_packed = not_size_aligned (DR_REF (dr));
665638fd1498Szrj 
665738fd1498Szrj       if (targetm.vectorize.support_vector_misalignment
665838fd1498Szrj 	    (mode, type, DR_MISALIGNMENT (dr), is_packed))
665938fd1498Szrj 	/* Can't software pipeline the loads, but can at least do them.  */
666038fd1498Szrj 	return dr_unaligned_supported;
666138fd1498Szrj     }
666238fd1498Szrj   else
666338fd1498Szrj     {
666438fd1498Szrj       bool is_packed = false;
666538fd1498Szrj       tree type = (TREE_TYPE (DR_REF (dr)));
666638fd1498Szrj 
666738fd1498Szrj       if (!known_alignment_for_access_p (dr))
666838fd1498Szrj 	is_packed = not_size_aligned (DR_REF (dr));
666938fd1498Szrj 
667038fd1498Szrj      if (targetm.vectorize.support_vector_misalignment
667138fd1498Szrj 	   (mode, type, DR_MISALIGNMENT (dr), is_packed))
667238fd1498Szrj        return dr_unaligned_supported;
667338fd1498Szrj     }
667438fd1498Szrj 
667538fd1498Szrj   /* Unsupported.  */
667638fd1498Szrj   return dr_unaligned_unsupported;
667738fd1498Szrj }
6678