1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "predict.h"
31 #include "memmodel.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "cgraph.h"
36 #include "dumpfile.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "tree-eh.h"
41 #include "gimplify.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-ssa-loop-ivopts.h"
45 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "cfgloop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "expr.h"
51 #include "builtins.h"
52 #include "params.h"
53 #include "tree-cfg.h"
54 #include "tree-hash-traits.h"
55 #include "vec-perm-indices.h"
56 #include "internal-fn.h"
57 
58 /* Return true if load- or store-lanes optab OPTAB is implemented for
59    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
60 
61 static bool
62 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
63 			      tree vectype, unsigned HOST_WIDE_INT count)
64 {
65   machine_mode mode, array_mode;
66   bool limit_p;
67 
68   mode = TYPE_MODE (vectype);
69   if (!targetm.array_mode (mode, count).exists (&array_mode))
70     {
71       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
72       limit_p = !targetm.array_mode_supported_p (mode, count);
73       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
74 	{
75 	  if (dump_enabled_p ())
76 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
77 			     "no array mode for %s["
78 			     HOST_WIDE_INT_PRINT_DEC "]\n",
79 			     GET_MODE_NAME (mode), count);
80 	  return false;
81 	}
82     }
83 
84   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
85     {
86       if (dump_enabled_p ())
87 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
88                          "cannot use %s<%s><%s>\n", name,
89                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
90       return false;
91     }
92 
93   if (dump_enabled_p ())
94     dump_printf_loc (MSG_NOTE, vect_location,
95                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
96                      GET_MODE_NAME (mode));
97 
98   return true;
99 }
100 
101 
102 /* Return the smallest scalar part of STMT.
103    This is used to determine the vectype of the stmt.  We generally set the
104    vectype according to the type of the result (lhs).  For stmts whose
105    result-type is different than the type of the arguments (e.g., demotion,
106    promotion), vectype will be reset appropriately (later).  Note that we have
107    to visit the smallest datatype in this function, because that determines the
108    VF.  If the smallest datatype in the loop is present only as the rhs of a
109    promotion operation - we'd miss it.
110    Such a case, where a variable of this datatype does not appear in the lhs
111    anywhere in the loop, can only occur if it's an invariant: e.g.:
112    'int_x = (int) short_inv', which we'd expect to have been optimized away by
113    invariant motion.  However, we cannot rely on invariant motion to always
114    take invariants out of the loop, and so in the case of promotion we also
115    have to check the rhs.
116    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
117    types.  */
118 
119 tree
120 vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
121                                HOST_WIDE_INT *rhs_size_unit)
122 {
123   tree scalar_type = gimple_expr_type (stmt);
124   HOST_WIDE_INT lhs, rhs;
125 
126   /* During the analysis phase, this function is called on arbitrary
127      statements that might not have scalar results.  */
128   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
129     return scalar_type;
130 
131   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
132 
133   if (is_gimple_assign (stmt)
134       && (gimple_assign_cast_p (stmt)
135           || gimple_assign_rhs_code (stmt) == DOT_PROD_EXPR
136           || gimple_assign_rhs_code (stmt) == WIDEN_SUM_EXPR
137           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
138           || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
139           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
140     {
141       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
142 
143       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
144       if (rhs < lhs)
145         scalar_type = rhs_type;
146     }
147 
148   *lhs_size_unit = lhs;
149   *rhs_size_unit = rhs;
150   return scalar_type;
151 }
152 
153 
154 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
155    tested at run-time.  Return TRUE if DDR was successfully inserted.
156    Return false if versioning is not supported.  */
157 
158 static bool
159 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
160 {
161   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
162 
163   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
164     return false;
165 
166   if (!runtime_alias_check_p (ddr, loop,
167 			      optimize_loop_nest_for_speed_p (loop)))
168     return false;
169 
170   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
171   return true;
172 }
173 
174 /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero.  */
175 
176 static void
177 vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
178 {
179   vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
180   for (unsigned int i = 0; i < checks.length(); ++i)
181     if (checks[i] == value)
182       return;
183 
184   if (dump_enabled_p ())
185     {
186       dump_printf_loc (MSG_NOTE, vect_location, "need run-time check that ");
187       dump_generic_expr (MSG_NOTE, TDF_SLIM, value);
188       dump_printf (MSG_NOTE, " is nonzero\n");
189     }
190   LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
191 }
192 
193 /* Return true if we know that the order of vectorized STMT_A and
194    vectorized STMT_B will be the same as the order of STMT_A and STMT_B.
195    At least one of the statements is a write.  */
196 
197 static bool
198 vect_preserves_scalar_order_p (gimple *stmt_a, gimple *stmt_b)
199 {
200   stmt_vec_info stmtinfo_a = vinfo_for_stmt (stmt_a);
201   stmt_vec_info stmtinfo_b = vinfo_for_stmt (stmt_b);
202 
203   /* Single statements are always kept in their original order.  */
204   if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
205       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
206     return true;
207 
208   /* STMT_A and STMT_B belong to overlapping groups.  All loads in a
209      SLP group are emitted at the position of the last scalar load and
210      all loads in an interleaving group are emitted at the position
211      of the first scalar load.
212      Stores in a group are emitted at the position of the last scalar store.
213      Compute that position and check whether the resulting order matches
214      the current one.
215      We have not yet decided between SLP and interleaving so we have
216      to conservatively assume both.  */
217   gimple *il_a;
218   gimple *last_a = il_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
219   if (last_a)
220     {
221       for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (last_a)); s;
222 	   s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
223 	last_a = get_later_stmt (last_a, s);
224       if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
225 	{
226 	  for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (il_a)); s;
227 	       s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
228 	    if (get_later_stmt (il_a, s) == il_a)
229 	      il_a = s;
230 	}
231       else
232 	il_a = last_a;
233     }
234   else
235     last_a = il_a = stmt_a;
236   gimple *il_b;
237   gimple *last_b = il_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
238   if (last_b)
239     {
240       for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (last_b)); s;
241 	   s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
242 	last_b = get_later_stmt (last_b, s);
243       if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
244 	{
245 	  for (gimple *s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (il_b)); s;
246 	       s = GROUP_NEXT_ELEMENT (vinfo_for_stmt (s)))
247 	    if (get_later_stmt (il_b, s) == il_b)
248 	      il_b = s;
249 	}
250       else
251 	il_b = last_b;
252     }
253   else
254     last_b = il_b = stmt_b;
255   bool a_after_b = (get_later_stmt (stmt_a, stmt_b) == stmt_a);
256   return (/* SLP */
257 	  (get_later_stmt (last_a, last_b) == last_a) == a_after_b
258 	  /* Interleaving */
259 	  && (get_later_stmt (il_a, il_b) == il_a) == a_after_b
260 	  /* Mixed */
261 	  && (get_later_stmt (il_a, last_b) == il_a) == a_after_b
262 	  && (get_later_stmt (last_a, il_b) == last_a) == a_after_b);
263 }
264 
265 /* A subroutine of vect_analyze_data_ref_dependence.  Handle
266    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
267    distances.  These distances are conservatively correct but they don't
268    reflect a guaranteed dependence.
269 
270    Return true if this function does all the work necessary to avoid
271    an alias or false if the caller should use the dependence distances
272    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
273    the depth of the loop described by LOOP_VINFO and the other arguments
274    are as for vect_analyze_data_ref_dependence.  */
275 
276 static bool
277 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
278 				       loop_vec_info loop_vinfo,
279 				       int loop_depth, unsigned int *max_vf)
280 {
281   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
282   lambda_vector dist_v;
283   unsigned int i;
284   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
285     {
286       int dist = dist_v[loop_depth];
287       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
288 	{
289 	  /* If the user asserted safelen >= DIST consecutive iterations
290 	     can be executed concurrently, assume independence.
291 
292 	     ??? An alternative would be to add the alias check even
293 	     in this case, and vectorize the fallback loop with the
294 	     maximum VF set to safelen.  However, if the user has
295 	     explicitly given a length, it's less likely that that
296 	     would be a win.  */
297 	  if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
298 	    {
299 	      if ((unsigned int) loop->safelen < *max_vf)
300 		*max_vf = loop->safelen;
301 	      LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
302 	      continue;
303 	    }
304 
305 	  /* For dependence distances of 2 or more, we have the option
306 	     of limiting VF or checking for an alias at runtime.
307 	     Prefer to check at runtime if we can, to avoid limiting
308 	     the VF unnecessarily when the bases are in fact independent.
309 
310 	     Note that the alias checks will be removed if the VF ends up
311 	     being small enough.  */
312 	  return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
313 	}
314     }
315   return true;
316 }
317 
318 
319 /* Function vect_analyze_data_ref_dependence.
320 
321    Return TRUE if there (might) exist a dependence between a memory-reference
322    DRA and a memory-reference DRB.  When versioning for alias may check a
323    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
324    the data dependence.  */
325 
326 static bool
327 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
328 				  loop_vec_info loop_vinfo,
329 				  unsigned int *max_vf)
330 {
331   unsigned int i;
332   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
333   struct data_reference *dra = DDR_A (ddr);
334   struct data_reference *drb = DDR_B (ddr);
335   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
336   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
337   lambda_vector dist_v;
338   unsigned int loop_depth;
339 
340   /* In loop analysis all data references should be vectorizable.  */
341   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
342       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
343     gcc_unreachable ();
344 
345   /* Independent data accesses.  */
346   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
347     return false;
348 
349   if (dra == drb
350       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
351     return false;
352 
353   /* We do not have to consider dependences between accesses that belong
354      to the same group, unless the stride could be smaller than the
355      group size.  */
356   if (GROUP_FIRST_ELEMENT (stmtinfo_a)
357       && GROUP_FIRST_ELEMENT (stmtinfo_a) == GROUP_FIRST_ELEMENT (stmtinfo_b)
358       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
359     return false;
360 
361   /* Even if we have an anti-dependence then, as the vectorized loop covers at
362      least two scalar iterations, there is always also a true dependence.
363      As the vectorizer does not re-order loads and stores we can ignore
364      the anti-dependence if TBAA can disambiguate both DRs similar to the
365      case with known negative distance anti-dependences (positive
366      distance anti-dependences would violate TBAA constraints).  */
367   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
368        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
369       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
370 				 get_alias_set (DR_REF (drb))))
371     return false;
372 
373   /* Unknown data dependence.  */
374   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
375     {
376       /* If user asserted safelen consecutive iterations can be
377 	 executed concurrently, assume independence.  */
378       if (loop->safelen >= 2)
379 	{
380 	  if ((unsigned int) loop->safelen < *max_vf)
381 	    *max_vf = loop->safelen;
382 	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
383 	  return false;
384 	}
385 
386       if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
387 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
388 	{
389 	  if (dump_enabled_p ())
390 	    {
391 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
392 			       "versioning for alias not supported for: "
393 			       "can't determine dependence between ");
394 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
395 				 DR_REF (dra));
396 	      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
397 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
398 				 DR_REF (drb));
399 	      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
400 	    }
401 	  return true;
402 	}
403 
404       if (dump_enabled_p ())
405 	{
406 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
407 			   "versioning for alias required: "
408 			   "can't determine dependence between ");
409 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
410 			     DR_REF (dra));
411 	  dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
412 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
413 			     DR_REF (drb));
414 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
415 	}
416 
417       /* Add to list of ddrs that need to be tested at run-time.  */
418       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
419     }
420 
421   /* Known data dependence.  */
422   if (DDR_NUM_DIST_VECTS (ddr) == 0)
423     {
424       /* If user asserted safelen consecutive iterations can be
425 	 executed concurrently, assume independence.  */
426       if (loop->safelen >= 2)
427 	{
428 	  if ((unsigned int) loop->safelen < *max_vf)
429 	    *max_vf = loop->safelen;
430 	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
431 	  return false;
432 	}
433 
434       if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
435 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
436 	{
437 	  if (dump_enabled_p ())
438 	    {
439 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440 			       "versioning for alias not supported for: "
441 			       "bad dist vector for ");
442 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
443 				 DR_REF (dra));
444 	      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
445 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
446 				 DR_REF (drb));
447 	      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
448 	    }
449 	  return true;
450 	}
451 
452       if (dump_enabled_p ())
453         {
454           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
455                            "versioning for alias required: "
456                            "bad dist vector for ");
457           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
458           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
459           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
460           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
461         }
462       /* Add to list of ddrs that need to be tested at run-time.  */
463       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
464     }
465 
466   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
467 
468   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
469       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
470 						loop_depth, max_vf))
471     return false;
472 
473   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
474     {
475       int dist = dist_v[loop_depth];
476 
477       if (dump_enabled_p ())
478 	dump_printf_loc (MSG_NOTE, vect_location,
479                          "dependence distance  = %d.\n", dist);
480 
481       if (dist == 0)
482 	{
483 	  if (dump_enabled_p ())
484 	    {
485 	      dump_printf_loc (MSG_NOTE, vect_location,
486 	                       "dependence distance == 0 between ");
487 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
488 	      dump_printf (MSG_NOTE, " and ");
489 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
490 	      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
491 	    }
492 
493 	  /* When we perform grouped accesses and perform implicit CSE
494 	     by detecting equal accesses and doing disambiguation with
495 	     runtime alias tests like for
496 	        .. = a[i];
497 		.. = a[i+1];
498 		a[i] = ..;
499 		a[i+1] = ..;
500 		*p = ..;
501 		.. = a[i];
502 		.. = a[i+1];
503 	     where we will end up loading { a[i], a[i+1] } once, make
504 	     sure that inserting group loads before the first load and
505 	     stores after the last store will do the right thing.
506 	     Similar for groups like
507 	        a[i] = ...;
508 		... = a[i];
509 		a[i+1] = ...;
510 	     where loads from the group interleave with the store.  */
511 	  if (!vect_preserves_scalar_order_p (DR_STMT (dra), DR_STMT (drb)))
512 	    {
513 	      if (dump_enabled_p ())
514 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
515 				 "READ_WRITE dependence in interleaving.\n");
516 	      return true;
517 	    }
518 
519 	  if (loop->safelen < 2)
520 	    {
521 	      tree indicator = dr_zero_step_indicator (dra);
522 	      if (TREE_CODE (indicator) != INTEGER_CST)
523 		vect_check_nonzero_value (loop_vinfo, indicator);
524 	      else if (integer_zerop (indicator))
525 		{
526 		  if (dump_enabled_p ())
527 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
528 				 "access also has a zero step\n");
529 		  return true;
530 		}
531 	    }
532 	  continue;
533 	}
534 
535       if (dist > 0 && DDR_REVERSED_P (ddr))
536 	{
537 	  /* If DDR_REVERSED_P the order of the data-refs in DDR was
538 	     reversed (to make distance vector positive), and the actual
539 	     distance is negative.  */
540 	  if (dump_enabled_p ())
541 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
542 	                     "dependence distance negative.\n");
543 	  /* Record a negative dependence distance to later limit the
544 	     amount of stmt copying / unrolling we can perform.
545 	     Only need to handle read-after-write dependence.  */
546 	  if (DR_IS_READ (drb)
547 	      && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
548 		  || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
549 	    STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
550 	  continue;
551 	}
552 
553       unsigned int abs_dist = abs (dist);
554       if (abs_dist >= 2 && abs_dist < *max_vf)
555 	{
556 	  /* The dependence distance requires reduction of the maximal
557 	     vectorization factor.  */
558 	  *max_vf = abs (dist);
559 	  if (dump_enabled_p ())
560 	    dump_printf_loc (MSG_NOTE, vect_location,
561 	                     "adjusting maximal vectorization factor to %i\n",
562 	                     *max_vf);
563 	}
564 
565       if (abs_dist >= *max_vf)
566 	{
567 	  /* Dependence distance does not create dependence, as far as
568 	     vectorization is concerned, in this case.  */
569 	  if (dump_enabled_p ())
570 	    dump_printf_loc (MSG_NOTE, vect_location,
571 	                     "dependence distance >= VF.\n");
572 	  continue;
573 	}
574 
575       if (dump_enabled_p ())
576 	{
577 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
578 	               "not vectorized, possible dependence "
579 	               "between data-refs ");
580 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
581 	  dump_printf (MSG_NOTE,  " and ");
582 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
583 	  dump_printf (MSG_NOTE,  "\n");
584 	}
585 
586       return true;
587     }
588 
589   return false;
590 }
591 
592 /* Function vect_analyze_data_ref_dependences.
593 
594    Examine all the data references in the loop, and make sure there do not
595    exist any data dependences between them.  Set *MAX_VF according to
596    the maximum vectorization factor the data dependences allow.  */
597 
598 bool
599 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
600 				   unsigned int *max_vf)
601 {
602   unsigned int i;
603   struct data_dependence_relation *ddr;
604 
605   if (dump_enabled_p ())
606     dump_printf_loc (MSG_NOTE, vect_location,
607                      "=== vect_analyze_data_ref_dependences ===\n");
608 
609   LOOP_VINFO_DDRS (loop_vinfo)
610     .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
611 	     * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
612   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
613   /* We need read-read dependences to compute STMT_VINFO_SAME_ALIGN_REFS.  */
614   if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
615 				&LOOP_VINFO_DDRS (loop_vinfo),
616 				LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
617     return false;
618 
619   /* For epilogues we either have no aliases or alias versioning
620      was applied to original loop.  Therefore we may just get max_vf
621      using VF of original loop.  */
622   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
623     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
624   else
625     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
626       if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
627 	return false;
628 
629   return true;
630 }
631 
632 
633 /* Function vect_slp_analyze_data_ref_dependence.
634 
635    Return TRUE if there (might) exist a dependence between a memory-reference
636    DRA and a memory-reference DRB.  When versioning for alias may check a
637    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
638    the data dependence.  */
639 
640 static bool
641 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
642 {
643   struct data_reference *dra = DDR_A (ddr);
644   struct data_reference *drb = DDR_B (ddr);
645 
646   /* We need to check dependences of statements marked as unvectorizable
647      as well, they still can prohibit vectorization.  */
648 
649   /* Independent data accesses.  */
650   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
651     return false;
652 
653   if (dra == drb)
654     return false;
655 
656   /* Read-read is OK.  */
657   if (DR_IS_READ (dra) && DR_IS_READ (drb))
658     return false;
659 
660   /* If dra and drb are part of the same interleaving chain consider
661      them independent.  */
662   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
663       && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
664 	  == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
665     return false;
666 
667   /* Unknown data dependence.  */
668   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
669     {
670       if  (dump_enabled_p ())
671 	{
672 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
673 			   "can't determine dependence between ");
674 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
675 	  dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
676 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
677 	  dump_printf (MSG_MISSED_OPTIMIZATION,  "\n");
678 	}
679     }
680   else if (dump_enabled_p ())
681     {
682       dump_printf_loc (MSG_NOTE, vect_location,
683 		       "determined dependence between ");
684       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
685       dump_printf (MSG_NOTE, " and ");
686       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
687       dump_printf (MSG_NOTE,  "\n");
688     }
689 
690   return true;
691 }
692 
693 
694 /* Analyze dependences involved in the transform of SLP NODE.  STORES
695    contain the vector of scalar stores of this instance if we are
696    disambiguating the loads.  */
697 
698 static bool
699 vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
700 				   vec<gimple *> stores, gimple *last_store)
701 {
702   /* This walks over all stmts involved in the SLP load/store done
703      in NODE verifying we can sink them up to the last stmt in the
704      group.  */
705   gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
706   for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
707     {
708       gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
709       if (access == last_access)
710 	continue;
711       data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
712       for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
713 	   gsi_stmt (gsi) != last_access; gsi_next (&gsi))
714 	{
715 	  gimple *stmt = gsi_stmt (gsi);
716 	  if (! gimple_vuse (stmt)
717 	      || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
718 	    continue;
719 
720 	  /* If we couldn't record a (single) data reference for this
721 	     stmt we have to give up.  */
722 	  /* ???  Here and below if dependence analysis fails we can resort
723 	     to the alias oracle which can handle more kinds of stmts.  */
724 	  data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
725 	  if (!dr_b)
726 	    return false;
727 
728 	  bool dependent = false;
729 	  /* If we run into a store of this same instance (we've just
730 	     marked those) then delay dependence checking until we run
731 	     into the last store because this is where it will have
732 	     been sunk to (and we verify if we can do that as well).  */
733 	  if (gimple_visited_p (stmt))
734 	    {
735 	      if (stmt != last_store)
736 		continue;
737 	      unsigned i;
738 	      gimple *store;
739 	      FOR_EACH_VEC_ELT (stores, i, store)
740 		{
741 		  data_reference *store_dr
742 		    = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
743 		  ddr_p ddr = initialize_data_dependence_relation
744 				(dr_a, store_dr, vNULL);
745 		  dependent = vect_slp_analyze_data_ref_dependence (ddr);
746 		  free_dependence_relation (ddr);
747 		  if (dependent)
748 		    break;
749 		}
750 	    }
751 	  else
752 	    {
753 	      ddr_p ddr = initialize_data_dependence_relation (dr_a,
754 							       dr_b, vNULL);
755 	      dependent = vect_slp_analyze_data_ref_dependence (ddr);
756 	      free_dependence_relation (ddr);
757 	    }
758 	  if (dependent)
759 	    return false;
760 	}
761     }
762   return true;
763 }
764 
765 
766 /* Function vect_analyze_data_ref_dependences.
767 
768    Examine all the data references in the basic-block, and make sure there
769    do not exist any data dependences between them.  Set *MAX_VF according to
770    the maximum vectorization factor the data dependences allow.  */
771 
772 bool
773 vect_slp_analyze_instance_dependence (slp_instance instance)
774 {
775   if (dump_enabled_p ())
776     dump_printf_loc (MSG_NOTE, vect_location,
777                      "=== vect_slp_analyze_instance_dependence ===\n");
778 
779   /* The stores of this instance are at the root of the SLP tree.  */
780   slp_tree store = SLP_INSTANCE_TREE (instance);
781   if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
782     store = NULL;
783 
784   /* Verify we can sink stores to the vectorized stmt insert location.  */
785   gimple *last_store = NULL;
786   if (store)
787     {
788       if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
789 	return false;
790 
791       /* Mark stores in this instance and remember the last one.  */
792       last_store = vect_find_last_scalar_stmt_in_slp (store);
793       for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
794 	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
795     }
796 
797   bool res = true;
798 
799   /* Verify we can sink loads to the vectorized stmt insert location,
800      special-casing stores of this instance.  */
801   slp_tree load;
802   unsigned int i;
803   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
804     if (! vect_slp_analyze_node_dependences (instance, load,
805 					     store
806 					     ? SLP_TREE_SCALAR_STMTS (store)
807 					     : vNULL, last_store))
808       {
809 	res = false;
810 	break;
811       }
812 
813   /* Unset the visited flag.  */
814   if (store)
815     for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
816       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
817 
818   return res;
819 }
820 
821 /* Record in VINFO the base alignment guarantee given by DRB.  STMT is
822    the statement that contains DRB, which is useful for recording in the
823    dump file.  */
824 
825 static void
826 vect_record_base_alignment (vec_info *vinfo, gimple *stmt,
827 			    innermost_loop_behavior *drb)
828 {
829   bool existed;
830   innermost_loop_behavior *&entry
831     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
832   if (!existed || entry->base_alignment < drb->base_alignment)
833     {
834       entry = drb;
835       if (dump_enabled_p ())
836 	{
837 	  dump_printf_loc (MSG_NOTE, vect_location,
838 			   "recording new base alignment for ");
839 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, drb->base_address);
840 	  dump_printf (MSG_NOTE, "\n");
841 	  dump_printf_loc (MSG_NOTE, vect_location,
842 			   "  alignment:    %d\n", drb->base_alignment);
843 	  dump_printf_loc (MSG_NOTE, vect_location,
844 			   "  misalignment: %d\n", drb->base_misalignment);
845 	  dump_printf_loc (MSG_NOTE, vect_location,
846 			   "  based on:     ");
847 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
848 	}
849     }
850 }
851 
852 /* If the region we're going to vectorize is reached, all unconditional
853    data references occur at least once.  We can therefore pool the base
854    alignment guarantees from each unconditional reference.  Do this by
855    going through all the data references in VINFO and checking whether
856    the containing statement makes the reference unconditionally.  If so,
857    record the alignment of the base address in VINFO so that it can be
858    used for all other references with the same base.  */
859 
860 void
861 vect_record_base_alignments (vec_info *vinfo)
862 {
863   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
864   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
865   data_reference *dr;
866   unsigned int i;
867   FOR_EACH_VEC_ELT (vinfo->datarefs, i, dr)
868     if (!DR_IS_CONDITIONAL_IN_STMT (dr))
869       {
870 	gimple *stmt = DR_STMT (dr);
871 	vect_record_base_alignment (vinfo, stmt, &DR_INNERMOST (dr));
872 
873 	/* If DR is nested in the loop that is being vectorized, we can also
874 	   record the alignment of the base wrt the outer loop.  */
875 	if (loop && nested_in_vect_loop_p (loop, stmt))
876 	  {
877 	    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
878 	    vect_record_base_alignment
879 	      (vinfo, stmt, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
880 	  }
881       }
882 }
883 
884 /* Return the target alignment for the vectorized form of DR.  */
885 
886 static unsigned int
887 vect_calculate_target_alignment (struct data_reference *dr)
888 {
889   gimple *stmt = DR_STMT (dr);
890   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
891   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
892   return targetm.vectorize.preferred_vector_alignment (vectype);
893 }
894 
895 /* Function vect_compute_data_ref_alignment
896 
897    Compute the misalignment of the data reference DR.
898 
899    Output:
900    1. If during the misalignment computation it is found that the data reference
901       cannot be vectorized then false is returned.
902    2. DR_MISALIGNMENT (DR) is defined.
903 
904    FOR NOW: No analysis is actually performed. Misalignment is calculated
905    only for trivial cases. TODO.  */
906 
907 bool
908 vect_compute_data_ref_alignment (struct data_reference *dr)
909 {
910   gimple *stmt = DR_STMT (dr);
911   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
912   vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments;
913   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
914   struct loop *loop = NULL;
915   tree ref = DR_REF (dr);
916   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
917 
918   if (dump_enabled_p ())
919     dump_printf_loc (MSG_NOTE, vect_location,
920                      "vect_compute_data_ref_alignment:\n");
921 
922   if (loop_vinfo)
923     loop = LOOP_VINFO_LOOP (loop_vinfo);
924 
925   /* Initialize misalignment to unknown.  */
926   SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
927 
928   innermost_loop_behavior *drb = vect_dr_behavior (dr);
929   bool step_preserves_misalignment_p;
930 
931   unsigned HOST_WIDE_INT vector_alignment
932     = vect_calculate_target_alignment (dr) / BITS_PER_UNIT;
933   DR_TARGET_ALIGNMENT (dr) = vector_alignment;
934 
935   /* No step for BB vectorization.  */
936   if (!loop)
937     {
938       gcc_assert (integer_zerop (drb->step));
939       step_preserves_misalignment_p = true;
940     }
941 
942   /* In case the dataref is in an inner-loop of the loop that is being
943      vectorized (LOOP), we use the base and misalignment information
944      relative to the outer-loop (LOOP).  This is ok only if the misalignment
945      stays the same throughout the execution of the inner-loop, which is why
946      we have to check that the stride of the dataref in the inner-loop evenly
947      divides by the vector alignment.  */
948   else if (nested_in_vect_loop_p (loop, stmt))
949     {
950       step_preserves_misalignment_p
951 	= (DR_STEP_ALIGNMENT (dr) % vector_alignment) == 0;
952 
953       if (dump_enabled_p ())
954 	{
955 	  if (step_preserves_misalignment_p)
956 	    dump_printf_loc (MSG_NOTE, vect_location,
957 			     "inner step divides the vector alignment.\n");
958 	  else
959 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
960 			     "inner step doesn't divide the vector"
961 			     " alignment.\n");
962 	}
963     }
964 
965   /* Similarly we can only use base and misalignment information relative to
966      an innermost loop if the misalignment stays the same throughout the
967      execution of the loop.  As above, this is the case if the stride of
968      the dataref evenly divides by the alignment.  */
969   else
970     {
971       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
972       step_preserves_misalignment_p
973 	= multiple_p (DR_STEP_ALIGNMENT (dr) * vf, vector_alignment);
974 
975       if (!step_preserves_misalignment_p && dump_enabled_p ())
976 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
977 			 "step doesn't divide the vector alignment.\n");
978     }
979 
980   unsigned int base_alignment = drb->base_alignment;
981   unsigned int base_misalignment = drb->base_misalignment;
982 
983   /* Calculate the maximum of the pooled base address alignment and the
984      alignment that we can compute for DR itself.  */
985   innermost_loop_behavior **entry = base_alignments->get (drb->base_address);
986   if (entry && base_alignment < (*entry)->base_alignment)
987     {
988       base_alignment = (*entry)->base_alignment;
989       base_misalignment = (*entry)->base_misalignment;
990     }
991 
992   if (drb->offset_alignment < vector_alignment
993       || !step_preserves_misalignment_p
994       /* We need to know whether the step wrt the vectorized loop is
995 	 negative when computing the starting misalignment below.  */
996       || TREE_CODE (drb->step) != INTEGER_CST)
997     {
998       if (dump_enabled_p ())
999 	{
1000 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1001 	                   "Unknown alignment for access: ");
1002 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
1003 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1004 	}
1005       return true;
1006     }
1007 
1008   if (base_alignment < vector_alignment)
1009     {
1010       unsigned int max_alignment;
1011       tree base = get_base_for_alignment (drb->base_address, &max_alignment);
1012       if (max_alignment < vector_alignment
1013 	  || !vect_can_force_dr_alignment_p (base,
1014 					     vector_alignment * BITS_PER_UNIT))
1015 	{
1016 	  if (dump_enabled_p ())
1017 	    {
1018 	      dump_printf_loc (MSG_NOTE, vect_location,
1019 	                       "can't force alignment of ref: ");
1020 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
1021 	      dump_printf (MSG_NOTE, "\n");
1022 	    }
1023 	  return true;
1024 	}
1025 
1026       /* Force the alignment of the decl.
1027 	 NOTE: This is the only change to the code we make during
1028 	 the analysis phase, before deciding to vectorize the loop.  */
1029       if (dump_enabled_p ())
1030         {
1031           dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
1032           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
1033           dump_printf (MSG_NOTE, "\n");
1034         }
1035 
1036       DR_VECT_AUX (dr)->base_decl = base;
1037       DR_VECT_AUX (dr)->base_misaligned = true;
1038       base_misalignment = 0;
1039     }
1040   poly_int64 misalignment
1041     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
1042 
1043   /* If this is a backward running DR then first access in the larger
1044      vectype actually is N-1 elements before the address in the DR.
1045      Adjust misalign accordingly.  */
1046   if (tree_int_cst_sgn (drb->step) < 0)
1047     /* PLUS because STEP is negative.  */
1048     misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
1049 		     * TREE_INT_CST_LOW (drb->step));
1050 
1051   unsigned int const_misalignment;
1052   if (!known_misalignment (misalignment, vector_alignment,
1053 			   &const_misalignment))
1054     {
1055       if (dump_enabled_p ())
1056 	{
1057 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1058 			   "Non-constant misalignment for access: ");
1059 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
1060 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1061 	}
1062       return true;
1063     }
1064 
1065   SET_DR_MISALIGNMENT (dr, const_misalignment);
1066 
1067   if (dump_enabled_p ())
1068     {
1069       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1070                        "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1071       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
1072       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1073     }
1074 
1075   return true;
1076 }
1077 
1078 /* Function vect_update_misalignment_for_peel.
1079    Sets DR's misalignment
1080    - to 0 if it has the same alignment as DR_PEEL,
1081    - to the misalignment computed using NPEEL if DR's salignment is known,
1082    - to -1 (unknown) otherwise.
1083 
1084    DR - the data reference whose misalignment is to be adjusted.
1085    DR_PEEL - the data reference whose misalignment is being made
1086              zero in the vector loop by the peel.
1087    NPEEL - the number of iterations in the peel loop if the misalignment
1088            of DR_PEEL is known at compile time.  */
1089 
1090 static void
1091 vect_update_misalignment_for_peel (struct data_reference *dr,
1092                                    struct data_reference *dr_peel, int npeel)
1093 {
1094   unsigned int i;
1095   vec<dr_p> same_aligned_drs;
1096   struct data_reference *current_dr;
1097   int dr_size = vect_get_scalar_dr_size (dr);
1098   int dr_peel_size = vect_get_scalar_dr_size (dr_peel);
1099   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1100   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1101 
1102  /* For interleaved data accesses the step in the loop must be multiplied by
1103      the size of the interleaving group.  */
1104   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1105     dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1106   if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1107     dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1108 
1109   /* It can be assumed that the data refs with the same alignment as dr_peel
1110      are aligned in the vector loop.  */
1111   same_aligned_drs
1112     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1113   FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr)
1114     {
1115       if (current_dr != dr)
1116         continue;
1117       gcc_assert (!known_alignment_for_access_p (dr)
1118 		  || !known_alignment_for_access_p (dr_peel)
1119 		  || (DR_MISALIGNMENT (dr) / dr_size
1120 		      == DR_MISALIGNMENT (dr_peel) / dr_peel_size));
1121       SET_DR_MISALIGNMENT (dr, 0);
1122       return;
1123     }
1124 
1125   if (known_alignment_for_access_p (dr)
1126       && known_alignment_for_access_p (dr_peel))
1127     {
1128       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1129       int misal = DR_MISALIGNMENT (dr);
1130       misal += negative ? -npeel * dr_size : npeel * dr_size;
1131       misal &= DR_TARGET_ALIGNMENT (dr) - 1;
1132       SET_DR_MISALIGNMENT (dr, misal);
1133       return;
1134     }
1135 
1136   if (dump_enabled_p ())
1137     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1138 		     "to unknown (-1).\n");
1139   SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
1140 }
1141 
1142 
1143 /* Function verify_data_ref_alignment
1144 
1145    Return TRUE if DR can be handled with respect to alignment.  */
1146 
1147 static bool
1148 verify_data_ref_alignment (data_reference_p dr)
1149 {
1150   enum dr_alignment_support supportable_dr_alignment
1151     = vect_supportable_dr_alignment (dr, false);
1152   if (!supportable_dr_alignment)
1153     {
1154       if (dump_enabled_p ())
1155 	{
1156 	  if (DR_IS_READ (dr))
1157 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1158 			     "not vectorized: unsupported unaligned load.");
1159 	  else
1160 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1161 			     "not vectorized: unsupported unaligned "
1162 			     "store.");
1163 
1164 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1165 			     DR_REF (dr));
1166 	  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1167 	}
1168       return false;
1169     }
1170 
1171   if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1172     dump_printf_loc (MSG_NOTE, vect_location,
1173 		     "Vectorizing an unaligned access.\n");
1174 
1175   return true;
1176 }
1177 
1178 /* Function vect_verify_datarefs_alignment
1179 
1180    Return TRUE if all data references in the loop can be
1181    handled with respect to alignment.  */
1182 
1183 bool
1184 vect_verify_datarefs_alignment (loop_vec_info vinfo)
1185 {
1186   vec<data_reference_p> datarefs = vinfo->datarefs;
1187   struct data_reference *dr;
1188   unsigned int i;
1189 
1190   FOR_EACH_VEC_ELT (datarefs, i, dr)
1191     {
1192       gimple *stmt = DR_STMT (dr);
1193       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1194 
1195       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1196 	continue;
1197 
1198       /* For interleaving, only the alignment of the first access matters.   */
1199       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1200 	  && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1201 	continue;
1202 
1203       /* Strided accesses perform only component accesses, alignment is
1204 	 irrelevant for them.  */
1205       if (STMT_VINFO_STRIDED_P (stmt_info)
1206 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1207 	continue;
1208 
1209       if (! verify_data_ref_alignment (dr))
1210 	return false;
1211     }
1212 
1213   return true;
1214 }
1215 
1216 /* Given an memory reference EXP return whether its alignment is less
1217    than its size.  */
1218 
1219 static bool
1220 not_size_aligned (tree exp)
1221 {
1222   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1223     return true;
1224 
1225   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1226 	  > get_object_alignment (exp));
1227 }
1228 
1229 /* Function vector_alignment_reachable_p
1230 
1231    Return true if vector alignment for DR is reachable by peeling
1232    a few loop iterations.  Return false otherwise.  */
1233 
1234 static bool
1235 vector_alignment_reachable_p (struct data_reference *dr)
1236 {
1237   gimple *stmt = DR_STMT (dr);
1238   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1239   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1240 
1241   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1242     {
1243       /* For interleaved access we peel only if number of iterations in
1244 	 the prolog loop ({VF - misalignment}), is a multiple of the
1245 	 number of the interleaved accesses.  */
1246       int elem_size, mis_in_elements;
1247 
1248       /* FORNOW: handle only known alignment.  */
1249       if (!known_alignment_for_access_p (dr))
1250 	return false;
1251 
1252       poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1253       poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1254       elem_size = vector_element_size (vector_size, nelements);
1255       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1256 
1257       if (!multiple_p (nelements - mis_in_elements, GROUP_SIZE (stmt_info)))
1258 	return false;
1259     }
1260 
1261   /* If misalignment is known at the compile time then allow peeling
1262      only if natural alignment is reachable through peeling.  */
1263   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1264     {
1265       HOST_WIDE_INT elmsize =
1266 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1267       if (dump_enabled_p ())
1268 	{
1269 	  dump_printf_loc (MSG_NOTE, vect_location,
1270 	                   "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1271 	  dump_printf (MSG_NOTE,
1272 	               ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
1273 	}
1274       if (DR_MISALIGNMENT (dr) % elmsize)
1275 	{
1276 	  if (dump_enabled_p ())
1277 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1278 	                     "data size does not divide the misalignment.\n");
1279 	  return false;
1280 	}
1281     }
1282 
1283   if (!known_alignment_for_access_p (dr))
1284     {
1285       tree type = TREE_TYPE (DR_REF (dr));
1286       bool is_packed = not_size_aligned (DR_REF (dr));
1287       if (dump_enabled_p ())
1288 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1289 	                 "Unknown misalignment, %snaturally aligned\n",
1290 			 is_packed ? "not " : "");
1291       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1292     }
1293 
1294   return true;
1295 }
1296 
1297 
1298 /* Calculate the cost of the memory access represented by DR.  */
1299 
1300 static void
1301 vect_get_data_access_cost (struct data_reference *dr,
1302                            unsigned int *inside_cost,
1303                            unsigned int *outside_cost,
1304 			   stmt_vector_for_cost *body_cost_vec)
1305 {
1306   gimple *stmt = DR_STMT (dr);
1307   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1308   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1309   int ncopies;
1310 
1311   if (PURE_SLP_STMT (stmt_info))
1312     ncopies = 1;
1313   else
1314     ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
1315 
1316   if (DR_IS_READ (dr))
1317     vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1318 			NULL, body_cost_vec, false);
1319   else
1320     vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1321 
1322   if (dump_enabled_p ())
1323     dump_printf_loc (MSG_NOTE, vect_location,
1324                      "vect_get_data_access_cost: inside_cost = %d, "
1325                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
1326 }
1327 
1328 
1329 typedef struct _vect_peel_info
1330 {
1331   struct data_reference *dr;
1332   int npeel;
1333   unsigned int count;
1334 } *vect_peel_info;
1335 
1336 typedef struct _vect_peel_extended_info
1337 {
1338   struct _vect_peel_info peel_info;
1339   unsigned int inside_cost;
1340   unsigned int outside_cost;
1341 } *vect_peel_extended_info;
1342 
1343 
1344 /* Peeling hashtable helpers.  */
1345 
1346 struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1347 {
1348   static inline hashval_t hash (const _vect_peel_info *);
1349   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1350 };
1351 
1352 inline hashval_t
1353 peel_info_hasher::hash (const _vect_peel_info *peel_info)
1354 {
1355   return (hashval_t) peel_info->npeel;
1356 }
1357 
1358 inline bool
1359 peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1360 {
1361   return (a->npeel == b->npeel);
1362 }
1363 
1364 
1365 /* Insert DR into peeling hash table with NPEEL as key.  */
1366 
1367 static void
1368 vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1369 			  loop_vec_info loop_vinfo, struct data_reference *dr,
1370                           int npeel)
1371 {
1372   struct _vect_peel_info elem, *slot;
1373   _vect_peel_info **new_slot;
1374   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1375 
1376   elem.npeel = npeel;
1377   slot = peeling_htab->find (&elem);
1378   if (slot)
1379     slot->count++;
1380   else
1381     {
1382       slot = XNEW (struct _vect_peel_info);
1383       slot->npeel = npeel;
1384       slot->dr = dr;
1385       slot->count = 1;
1386       new_slot = peeling_htab->find_slot (slot, INSERT);
1387       *new_slot = slot;
1388     }
1389 
1390   if (!supportable_dr_alignment
1391       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1392     slot->count += VECT_MAX_COST;
1393 }
1394 
1395 
1396 /* Traverse peeling hash table to find peeling option that aligns maximum
1397    number of data accesses.  */
1398 
1399 int
1400 vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1401 				     _vect_peel_extended_info *max)
1402 {
1403   vect_peel_info elem = *slot;
1404 
1405   if (elem->count > max->peel_info.count
1406       || (elem->count == max->peel_info.count
1407           && max->peel_info.npeel > elem->npeel))
1408     {
1409       max->peel_info.npeel = elem->npeel;
1410       max->peel_info.count = elem->count;
1411       max->peel_info.dr = elem->dr;
1412     }
1413 
1414   return 1;
1415 }
1416 
1417 /* Get the costs of peeling NPEEL iterations checking data access costs
1418    for all data refs.  If UNKNOWN_MISALIGNMENT is true, we assume DR0's
1419    misalignment will be zero after peeling.  */
1420 
1421 static void
1422 vect_get_peeling_costs_all_drs (vec<data_reference_p> datarefs,
1423 				struct data_reference *dr0,
1424 				unsigned int *inside_cost,
1425 				unsigned int *outside_cost,
1426 				stmt_vector_for_cost *body_cost_vec,
1427 				unsigned int npeel,
1428 				bool unknown_misalignment)
1429 {
1430   unsigned i;
1431   data_reference *dr;
1432 
1433   FOR_EACH_VEC_ELT (datarefs, i, dr)
1434     {
1435       gimple *stmt = DR_STMT (dr);
1436       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1437       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1438 	continue;
1439 
1440       /* For interleaving, only the alignment of the first access
1441          matters.  */
1442       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1443           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1444         continue;
1445 
1446       /* Strided accesses perform only component accesses, alignment is
1447          irrelevant for them.  */
1448       if (STMT_VINFO_STRIDED_P (stmt_info)
1449 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1450 	continue;
1451 
1452       int save_misalignment;
1453       save_misalignment = DR_MISALIGNMENT (dr);
1454       if (npeel == 0)
1455 	;
1456       else if (unknown_misalignment && dr == dr0)
1457 	SET_DR_MISALIGNMENT (dr, 0);
1458       else
1459 	vect_update_misalignment_for_peel (dr, dr0, npeel);
1460       vect_get_data_access_cost (dr, inside_cost, outside_cost,
1461 				 body_cost_vec);
1462       SET_DR_MISALIGNMENT (dr, save_misalignment);
1463     }
1464 }
1465 
1466 /* Traverse peeling hash table and calculate cost for each peeling option.
1467    Find the one with the lowest cost.  */
1468 
1469 int
1470 vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1471 				   _vect_peel_extended_info *min)
1472 {
1473   vect_peel_info elem = *slot;
1474   int dummy;
1475   unsigned int inside_cost = 0, outside_cost = 0;
1476   gimple *stmt = DR_STMT (elem->dr);
1477   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1478   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1479   stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1480 		       epilogue_cost_vec;
1481 
1482   prologue_cost_vec.create (2);
1483   body_cost_vec.create (2);
1484   epilogue_cost_vec.create (2);
1485 
1486   vect_get_peeling_costs_all_drs (LOOP_VINFO_DATAREFS (loop_vinfo),
1487 				  elem->dr, &inside_cost, &outside_cost,
1488 				  &body_cost_vec, elem->npeel, false);
1489 
1490   body_cost_vec.release ();
1491 
1492   outside_cost += vect_get_known_peeling_cost
1493     (loop_vinfo, elem->npeel, &dummy,
1494      &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1495      &prologue_cost_vec, &epilogue_cost_vec);
1496 
1497   /* Prologue and epilogue costs are added to the target model later.
1498      These costs depend only on the scalar iteration cost, the
1499      number of peeling iterations finally chosen, and the number of
1500      misaligned statements.  So discard the information found here.  */
1501   prologue_cost_vec.release ();
1502   epilogue_cost_vec.release ();
1503 
1504   if (inside_cost < min->inside_cost
1505       || (inside_cost == min->inside_cost
1506 	  && outside_cost < min->outside_cost))
1507     {
1508       min->inside_cost = inside_cost;
1509       min->outside_cost = outside_cost;
1510       min->peel_info.dr = elem->dr;
1511       min->peel_info.npeel = elem->npeel;
1512       min->peel_info.count = elem->count;
1513     }
1514 
1515   return 1;
1516 }
1517 
1518 
1519 /* Choose best peeling option by traversing peeling hash table and either
1520    choosing an option with the lowest cost (if cost model is enabled) or the
1521    option that aligns as many accesses as possible.  */
1522 
1523 static struct _vect_peel_extended_info
1524 vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
1525 				       loop_vec_info loop_vinfo)
1526 {
1527    struct _vect_peel_extended_info res;
1528 
1529    res.peel_info.dr = NULL;
1530 
1531    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1532      {
1533        res.inside_cost = INT_MAX;
1534        res.outside_cost = INT_MAX;
1535        peeling_htab->traverse <_vect_peel_extended_info *,
1536 	   		       vect_peeling_hash_get_lowest_cost> (&res);
1537      }
1538    else
1539      {
1540        res.peel_info.count = 0;
1541        peeling_htab->traverse <_vect_peel_extended_info *,
1542 	   		       vect_peeling_hash_get_most_frequent> (&res);
1543        res.inside_cost = 0;
1544        res.outside_cost = 0;
1545      }
1546 
1547    return res;
1548 }
1549 
1550 /* Return true if the new peeling NPEEL is supported.  */
1551 
1552 static bool
1553 vect_peeling_supportable (loop_vec_info loop_vinfo, struct data_reference *dr0,
1554 			  unsigned npeel)
1555 {
1556   unsigned i;
1557   struct data_reference *dr = NULL;
1558   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1559   gimple *stmt;
1560   stmt_vec_info stmt_info;
1561   enum dr_alignment_support supportable_dr_alignment;
1562 
1563   /* Ensure that all data refs can be vectorized after the peel.  */
1564   FOR_EACH_VEC_ELT (datarefs, i, dr)
1565     {
1566       int save_misalignment;
1567 
1568       if (dr == dr0)
1569 	continue;
1570 
1571       stmt = DR_STMT (dr);
1572       stmt_info = vinfo_for_stmt (stmt);
1573       /* For interleaving, only the alignment of the first access
1574 	 matters.  */
1575       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1576 	  && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1577 	continue;
1578 
1579       /* Strided accesses perform only component accesses, alignment is
1580 	 irrelevant for them.  */
1581       if (STMT_VINFO_STRIDED_P (stmt_info)
1582 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1583 	continue;
1584 
1585       save_misalignment = DR_MISALIGNMENT (dr);
1586       vect_update_misalignment_for_peel (dr, dr0, npeel);
1587       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1588       SET_DR_MISALIGNMENT (dr, save_misalignment);
1589 
1590       if (!supportable_dr_alignment)
1591 	return false;
1592     }
1593 
1594   return true;
1595 }
1596 
1597 /* Function vect_enhance_data_refs_alignment
1598 
1599    This pass will use loop versioning and loop peeling in order to enhance
1600    the alignment of data references in the loop.
1601 
1602    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1603    original loop is to be vectorized.  Any other loops that are created by
1604    the transformations performed in this pass - are not supposed to be
1605    vectorized.  This restriction will be relaxed.
1606 
1607    This pass will require a cost model to guide it whether to apply peeling
1608    or versioning or a combination of the two.  For example, the scheme that
1609    intel uses when given a loop with several memory accesses, is as follows:
1610    choose one memory access ('p') which alignment you want to force by doing
1611    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1612    other accesses are not necessarily aligned, or (2) use loop versioning to
1613    generate one loop in which all accesses are aligned, and another loop in
1614    which only 'p' is necessarily aligned.
1615 
1616    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1617    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1618    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1619 
1620    Devising a cost model is the most critical aspect of this work.  It will
1621    guide us on which access to peel for, whether to use loop versioning, how
1622    many versions to create, etc.  The cost model will probably consist of
1623    generic considerations as well as target specific considerations (on
1624    powerpc for example, misaligned stores are more painful than misaligned
1625    loads).
1626 
1627    Here are the general steps involved in alignment enhancements:
1628 
1629      -- original loop, before alignment analysis:
1630 	for (i=0; i<N; i++){
1631 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
1632 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1633 	}
1634 
1635      -- After vect_compute_data_refs_alignment:
1636 	for (i=0; i<N; i++){
1637 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1638 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1639 	}
1640 
1641      -- Possibility 1: we do loop versioning:
1642      if (p is aligned) {
1643 	for (i=0; i<N; i++){	# loop 1A
1644 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1645 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1646 	}
1647      }
1648      else {
1649 	for (i=0; i<N; i++){	# loop 1B
1650 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1651 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1652 	}
1653      }
1654 
1655      -- Possibility 2: we do loop peeling:
1656      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1657 	x = q[i];
1658 	p[i] = y;
1659      }
1660      for (i = 3; i < N; i++){	# loop 2A
1661 	x = q[i];			# DR_MISALIGNMENT(q) = 0
1662 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1663      }
1664 
1665      -- Possibility 3: combination of loop peeling and versioning:
1666      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1667 	x = q[i];
1668 	p[i] = y;
1669      }
1670      if (p is aligned) {
1671 	for (i = 3; i<N; i++){	# loop 3A
1672 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1673 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1674 	}
1675      }
1676      else {
1677 	for (i = 3; i<N; i++){	# loop 3B
1678 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1679 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1680 	}
1681      }
1682 
1683      These loops are later passed to loop_transform to be vectorized.  The
1684      vectorizer will use the alignment information to guide the transformation
1685      (whether to generate regular loads/stores, or with special handling for
1686      misalignment).  */
1687 
1688 bool
1689 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1690 {
1691   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1692   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1693   enum dr_alignment_support supportable_dr_alignment;
1694   struct data_reference *dr0 = NULL, *first_store = NULL;
1695   struct data_reference *dr;
1696   unsigned int i, j;
1697   bool do_peeling = false;
1698   bool do_versioning = false;
1699   bool stat;
1700   gimple *stmt;
1701   stmt_vec_info stmt_info;
1702   unsigned int npeel = 0;
1703   bool one_misalignment_known = false;
1704   bool one_misalignment_unknown = false;
1705   bool one_dr_unsupportable = false;
1706   struct data_reference *unsupportable_dr = NULL;
1707   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1708   unsigned possible_npeel_number = 1;
1709   tree vectype;
1710   unsigned int mis, same_align_drs_max = 0;
1711   hash_table<peel_info_hasher> peeling_htab (1);
1712 
1713   if (dump_enabled_p ())
1714     dump_printf_loc (MSG_NOTE, vect_location,
1715                      "=== vect_enhance_data_refs_alignment ===\n");
1716 
1717   /* Reset data so we can safely be called multiple times.  */
1718   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1719   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
1720 
1721   /* While cost model enhancements are expected in the future, the high level
1722      view of the code at this time is as follows:
1723 
1724      A) If there is a misaligned access then see if peeling to align
1725         this access can make all data references satisfy
1726         vect_supportable_dr_alignment.  If so, update data structures
1727         as needed and return true.
1728 
1729      B) If peeling wasn't possible and there is a data reference with an
1730         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1731         then see if loop versioning checks can be used to make all data
1732         references satisfy vect_supportable_dr_alignment.  If so, update
1733         data structures as needed and return true.
1734 
1735      C) If neither peeling nor versioning were successful then return false if
1736         any data reference does not satisfy vect_supportable_dr_alignment.
1737 
1738      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1739 
1740      Note, Possibility 3 above (which is peeling and versioning together) is not
1741      being done at this time.  */
1742 
1743   /* (1) Peeling to force alignment.  */
1744 
1745   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1746      Considerations:
1747      + How many accesses will become aligned due to the peeling
1748      - How many accesses will become unaligned due to the peeling,
1749        and the cost of misaligned accesses.
1750      - The cost of peeling (the extra runtime checks, the increase
1751        in code size).  */
1752 
1753   FOR_EACH_VEC_ELT (datarefs, i, dr)
1754     {
1755       stmt = DR_STMT (dr);
1756       stmt_info = vinfo_for_stmt (stmt);
1757 
1758       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1759 	continue;
1760 
1761       /* For interleaving, only the alignment of the first access
1762          matters.  */
1763       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1764           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1765         continue;
1766 
1767       /* For invariant accesses there is nothing to enhance.  */
1768       if (integer_zerop (DR_STEP (dr)))
1769 	continue;
1770 
1771       /* Strided accesses perform only component accesses, alignment is
1772 	 irrelevant for them.  */
1773       if (STMT_VINFO_STRIDED_P (stmt_info)
1774 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1775 	continue;
1776 
1777       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1778       do_peeling = vector_alignment_reachable_p (dr);
1779       if (do_peeling)
1780         {
1781           if (known_alignment_for_access_p (dr))
1782             {
1783 	      unsigned int npeel_tmp = 0;
1784 	      bool negative = tree_int_cst_compare (DR_STEP (dr),
1785 						    size_zero_node) < 0;
1786 
1787 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
1788 	      unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1789 	      unsigned int dr_size = vect_get_scalar_dr_size (dr);
1790 	      mis = (negative ? DR_MISALIGNMENT (dr) : -DR_MISALIGNMENT (dr));
1791 	      if (DR_MISALIGNMENT (dr) != 0)
1792 		npeel_tmp = (mis & (target_align - 1)) / dr_size;
1793 
1794               /* For multiple types, it is possible that the bigger type access
1795                  will have more than one peeling option.  E.g., a loop with two
1796                  types: one of size (vector size / 4), and the other one of
1797                  size (vector size / 8).  Vectorization factor will 8.  If both
1798                  accesses are misaligned by 3, the first one needs one scalar
1799                  iteration to be aligned, and the second one needs 5.  But the
1800 		 first one will be aligned also by peeling 5 scalar
1801                  iterations, and in that case both accesses will be aligned.
1802                  Hence, except for the immediate peeling amount, we also want
1803                  to try to add full vector size, while we don't exceed
1804                  vectorization factor.
1805                  We do this automatically for cost model, since we calculate
1806 		 cost for every peeling option.  */
1807               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1808 		{
1809 		  poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info)
1810 					  ? vf * GROUP_SIZE (stmt_info) : vf);
1811 		  possible_npeel_number
1812 		    = vect_get_num_vectors (nscalars, vectype);
1813 
1814 		  /* NPEEL_TMP is 0 when there is no misalignment, but also
1815 		     allow peeling NELEMENTS.  */
1816 		  if (DR_MISALIGNMENT (dr) == 0)
1817 		    possible_npeel_number++;
1818 		}
1819 
1820 	      /* Save info about DR in the hash table.  Also include peeling
1821 	         amounts according to the explanation above.  */
1822               for (j = 0; j < possible_npeel_number; j++)
1823                 {
1824                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
1825 					    dr, npeel_tmp);
1826 		  npeel_tmp += target_align / dr_size;
1827                 }
1828 
1829 	      one_misalignment_known = true;
1830             }
1831           else
1832             {
1833               /* If we don't know any misalignment values, we prefer
1834                  peeling for data-ref that has the maximum number of data-refs
1835                  with the same alignment, unless the target prefers to align
1836                  stores over load.  */
1837 	      unsigned same_align_drs
1838 		= STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1839 	      if (!dr0
1840 		  || same_align_drs_max < same_align_drs)
1841 		{
1842 		  same_align_drs_max = same_align_drs;
1843 		  dr0 = dr;
1844 		}
1845 	      /* For data-refs with the same number of related
1846 		 accesses prefer the one where the misalign
1847 		 computation will be invariant in the outermost loop.  */
1848 	      else if (same_align_drs_max == same_align_drs)
1849 		{
1850 		  struct loop *ivloop0, *ivloop;
1851 		  ivloop0 = outermost_invariant_loop_for_expr
1852 		    (loop, DR_BASE_ADDRESS (dr0));
1853 		  ivloop = outermost_invariant_loop_for_expr
1854 		    (loop, DR_BASE_ADDRESS (dr));
1855 		  if ((ivloop && !ivloop0)
1856 		      || (ivloop && ivloop0
1857 			  && flow_loop_nested_p (ivloop, ivloop0)))
1858 		    dr0 = dr;
1859 		}
1860 
1861 	      one_misalignment_unknown = true;
1862 
1863 	      /* Check for data refs with unsupportable alignment that
1864 	         can be peeled.  */
1865 	      if (!supportable_dr_alignment)
1866 	      {
1867 		one_dr_unsupportable = true;
1868 		unsupportable_dr = dr;
1869 	      }
1870 
1871 	      if (!first_store && DR_IS_WRITE (dr))
1872 		first_store = dr;
1873             }
1874         }
1875       else
1876         {
1877           if (!aligned_access_p (dr))
1878             {
1879               if (dump_enabled_p ())
1880                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1881                                  "vector alignment may not be reachable\n");
1882               break;
1883             }
1884         }
1885     }
1886 
1887   /* Check if we can possibly peel the loop.  */
1888   if (!vect_can_advance_ivs_p (loop_vinfo)
1889       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
1890       || loop->inner)
1891     do_peeling = false;
1892 
1893   struct _vect_peel_extended_info peel_for_known_alignment;
1894   struct _vect_peel_extended_info peel_for_unknown_alignment;
1895   struct _vect_peel_extended_info best_peel;
1896 
1897   peel_for_unknown_alignment.inside_cost = INT_MAX;
1898   peel_for_unknown_alignment.outside_cost = INT_MAX;
1899   peel_for_unknown_alignment.peel_info.count = 0;
1900 
1901   if (do_peeling
1902       && one_misalignment_unknown)
1903     {
1904       /* Check if the target requires to prefer stores over loads, i.e., if
1905          misaligned stores are more expensive than misaligned loads (taking
1906          drs with same alignment into account).  */
1907       unsigned int load_inside_cost = 0;
1908       unsigned int load_outside_cost = 0;
1909       unsigned int store_inside_cost = 0;
1910       unsigned int store_outside_cost = 0;
1911       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
1912 
1913       stmt_vector_for_cost dummy;
1914       dummy.create (2);
1915       vect_get_peeling_costs_all_drs (datarefs, dr0,
1916 				      &load_inside_cost,
1917 				      &load_outside_cost,
1918 				      &dummy, estimated_npeels, true);
1919       dummy.release ();
1920 
1921       if (first_store)
1922 	{
1923 	  dummy.create (2);
1924 	  vect_get_peeling_costs_all_drs (datarefs, first_store,
1925 					  &store_inside_cost,
1926 					  &store_outside_cost,
1927 					  &dummy, estimated_npeels, true);
1928 	  dummy.release ();
1929 	}
1930       else
1931 	{
1932 	  store_inside_cost = INT_MAX;
1933 	  store_outside_cost = INT_MAX;
1934 	}
1935 
1936       if (load_inside_cost > store_inside_cost
1937 	  || (load_inside_cost == store_inside_cost
1938 	      && load_outside_cost > store_outside_cost))
1939 	{
1940 	  dr0 = first_store;
1941 	  peel_for_unknown_alignment.inside_cost = store_inside_cost;
1942 	  peel_for_unknown_alignment.outside_cost = store_outside_cost;
1943 	}
1944       else
1945 	{
1946 	  peel_for_unknown_alignment.inside_cost = load_inside_cost;
1947 	  peel_for_unknown_alignment.outside_cost = load_outside_cost;
1948 	}
1949 
1950       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1951       prologue_cost_vec.create (2);
1952       epilogue_cost_vec.create (2);
1953 
1954       int dummy2;
1955       peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
1956 	(loop_vinfo, estimated_npeels, &dummy2,
1957 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1958 	 &prologue_cost_vec, &epilogue_cost_vec);
1959 
1960       prologue_cost_vec.release ();
1961       epilogue_cost_vec.release ();
1962 
1963       peel_for_unknown_alignment.peel_info.count = 1
1964 	+ STMT_VINFO_SAME_ALIGN_REFS
1965 	(vinfo_for_stmt (DR_STMT (dr0))).length ();
1966     }
1967 
1968   peel_for_unknown_alignment.peel_info.npeel = 0;
1969   peel_for_unknown_alignment.peel_info.dr = dr0;
1970 
1971   best_peel = peel_for_unknown_alignment;
1972 
1973   peel_for_known_alignment.inside_cost = INT_MAX;
1974   peel_for_known_alignment.outside_cost = INT_MAX;
1975   peel_for_known_alignment.peel_info.count = 0;
1976   peel_for_known_alignment.peel_info.dr = NULL;
1977 
1978   if (do_peeling && one_misalignment_known)
1979     {
1980       /* Peeling is possible, but there is no data access that is not supported
1981          unless aligned.  So we try to choose the best possible peeling from
1982 	 the hash table.  */
1983       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
1984 	(&peeling_htab, loop_vinfo);
1985     }
1986 
1987   /* Compare costs of peeling for known and unknown alignment. */
1988   if (peel_for_known_alignment.peel_info.dr != NULL
1989       && peel_for_unknown_alignment.inside_cost
1990       >= peel_for_known_alignment.inside_cost)
1991     {
1992       best_peel = peel_for_known_alignment;
1993 
1994       /* If the best peeling for known alignment has NPEEL == 0, perform no
1995          peeling at all except if there is an unsupportable dr that we can
1996          align.  */
1997       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
1998 	do_peeling = false;
1999     }
2000 
2001   /* If there is an unsupportable data ref, prefer this over all choices so far
2002      since we'd have to discard a chosen peeling except when it accidentally
2003      aligned the unsupportable data ref.  */
2004   if (one_dr_unsupportable)
2005     dr0 = unsupportable_dr;
2006   else if (do_peeling)
2007     {
2008       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
2009 	 TODO: Use nopeel_outside_cost or get rid of it?  */
2010       unsigned nopeel_inside_cost = 0;
2011       unsigned nopeel_outside_cost = 0;
2012 
2013       stmt_vector_for_cost dummy;
2014       dummy.create (2);
2015       vect_get_peeling_costs_all_drs (datarefs, NULL, &nopeel_inside_cost,
2016 				      &nopeel_outside_cost, &dummy, 0, false);
2017       dummy.release ();
2018 
2019       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
2020 	 costs will be recorded.  */
2021       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2022       prologue_cost_vec.create (2);
2023       epilogue_cost_vec.create (2);
2024 
2025       int dummy2;
2026       nopeel_outside_cost += vect_get_known_peeling_cost
2027 	(loop_vinfo, 0, &dummy2,
2028 	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2029 	 &prologue_cost_vec, &epilogue_cost_vec);
2030 
2031       prologue_cost_vec.release ();
2032       epilogue_cost_vec.release ();
2033 
2034       npeel = best_peel.peel_info.npeel;
2035       dr0 = best_peel.peel_info.dr;
2036 
2037       /* If no peeling is not more expensive than the best peeling we
2038 	 have so far, don't perform any peeling.  */
2039       if (nopeel_inside_cost <= best_peel.inside_cost)
2040 	do_peeling = false;
2041     }
2042 
2043   if (do_peeling)
2044     {
2045       stmt = DR_STMT (dr0);
2046       stmt_info = vinfo_for_stmt (stmt);
2047       vectype = STMT_VINFO_VECTYPE (stmt_info);
2048 
2049       if (known_alignment_for_access_p (dr0))
2050         {
2051 	  bool negative = tree_int_cst_compare (DR_STEP (dr0),
2052 						size_zero_node) < 0;
2053           if (!npeel)
2054             {
2055               /* Since it's known at compile time, compute the number of
2056                  iterations in the peeled loop (the peeling factor) for use in
2057                  updating DR_MISALIGNMENT values.  The peeling factor is the
2058                  vectorization factor minus the misalignment as an element
2059                  count.  */
2060 	      mis = negative ? DR_MISALIGNMENT (dr0) : -DR_MISALIGNMENT (dr0);
2061 	      unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
2062 	      npeel = ((mis & (target_align - 1))
2063 		       / vect_get_scalar_dr_size (dr0));
2064             }
2065 
2066 	  /* For interleaved data access every iteration accesses all the
2067 	     members of the group, therefore we divide the number of iterations
2068 	     by the group size.  */
2069 	  stmt_info = vinfo_for_stmt (DR_STMT (dr0));
2070 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2071 	    npeel /= GROUP_SIZE (stmt_info);
2072 
2073           if (dump_enabled_p ())
2074             dump_printf_loc (MSG_NOTE, vect_location,
2075                              "Try peeling by %d\n", npeel);
2076         }
2077 
2078       /* Ensure that all datarefs can be vectorized after the peel.  */
2079       if (!vect_peeling_supportable (loop_vinfo, dr0, npeel))
2080 	do_peeling = false;
2081 
2082       /* Check if all datarefs are supportable and log.  */
2083       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
2084         {
2085           stat = vect_verify_datarefs_alignment (loop_vinfo);
2086           if (!stat)
2087             do_peeling = false;
2088           else
2089 	    return stat;
2090         }
2091 
2092       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
2093       if (do_peeling)
2094         {
2095           unsigned max_allowed_peel
2096             = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
2097           if (max_allowed_peel != (unsigned)-1)
2098             {
2099               unsigned max_peel = npeel;
2100               if (max_peel == 0)
2101                 {
2102 		  unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
2103 		  max_peel = target_align / vect_get_scalar_dr_size (dr0) - 1;
2104                 }
2105               if (max_peel > max_allowed_peel)
2106                 {
2107                   do_peeling = false;
2108                   if (dump_enabled_p ())
2109                     dump_printf_loc (MSG_NOTE, vect_location,
2110                         "Disable peeling, max peels reached: %d\n", max_peel);
2111                 }
2112             }
2113         }
2114 
2115       /* Cost model #2 - if peeling may result in a remaining loop not
2116 	 iterating enough to be vectorized then do not peel.  Since this
2117 	 is a cost heuristic rather than a correctness decision, use the
2118 	 most likely runtime value for variable vectorization factors.  */
2119       if (do_peeling
2120 	  && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2121 	{
2122 	  unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2123 	  unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2124 	  if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2125 	      < assumed_vf + max_peel)
2126 	    do_peeling = false;
2127 	}
2128 
2129       if (do_peeling)
2130         {
2131           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2132              If the misalignment of DR_i is identical to that of dr0 then set
2133              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
2134              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2135              by the peeling factor times the element size of DR_i (MOD the
2136              vectorization factor times the size).  Otherwise, the
2137              misalignment of DR_i must be set to unknown.  */
2138 	  FOR_EACH_VEC_ELT (datarefs, i, dr)
2139 	    if (dr != dr0)
2140 	      {
2141 		/* Strided accesses perform only component accesses, alignment
2142 		   is irrelevant for them.  */
2143 		stmt_info = vinfo_for_stmt (DR_STMT (dr));
2144 		if (STMT_VINFO_STRIDED_P (stmt_info)
2145 		    && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2146 		  continue;
2147 
2148 		vect_update_misalignment_for_peel (dr, dr0, npeel);
2149 	      }
2150 
2151           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
2152           if (npeel)
2153             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2154           else
2155             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2156 	      = DR_MISALIGNMENT (dr0);
2157 	  SET_DR_MISALIGNMENT (dr0, 0);
2158 	  if (dump_enabled_p ())
2159             {
2160               dump_printf_loc (MSG_NOTE, vect_location,
2161                                "Alignment of access forced using peeling.\n");
2162               dump_printf_loc (MSG_NOTE, vect_location,
2163                                "Peeling for alignment will be applied.\n");
2164             }
2165 
2166 	  /* The inside-loop cost will be accounted for in vectorizable_load
2167 	     and vectorizable_store correctly with adjusted alignments.
2168 	     Drop the body_cst_vec on the floor here.  */
2169 	  stat = vect_verify_datarefs_alignment (loop_vinfo);
2170 	  gcc_assert (stat);
2171           return stat;
2172         }
2173     }
2174 
2175   /* (2) Versioning to force alignment.  */
2176 
2177   /* Try versioning if:
2178      1) optimize loop for speed
2179      2) there is at least one unsupported misaligned data ref with an unknown
2180         misalignment, and
2181      3) all misaligned data refs with a known misalignment are supported, and
2182      4) the number of runtime alignment checks is within reason.  */
2183 
2184   do_versioning =
2185 	optimize_loop_nest_for_speed_p (loop)
2186 	&& (!loop->inner); /* FORNOW */
2187 
2188   if (do_versioning)
2189     {
2190       FOR_EACH_VEC_ELT (datarefs, i, dr)
2191         {
2192 	  stmt = DR_STMT (dr);
2193 	  stmt_info = vinfo_for_stmt (stmt);
2194 
2195 	  /* For interleaving, only the alignment of the first access
2196 	     matters.  */
2197 	  if (aligned_access_p (dr)
2198 	      || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
2199 		  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
2200 	    continue;
2201 
2202 	  if (STMT_VINFO_STRIDED_P (stmt_info))
2203 	    {
2204 	      /* Strided loads perform only component accesses, alignment is
2205 		 irrelevant for them.  */
2206 	      if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2207 		continue;
2208 	      do_versioning = false;
2209 	      break;
2210 	    }
2211 
2212 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
2213 
2214           if (!supportable_dr_alignment)
2215             {
2216 	      gimple *stmt;
2217               int mask;
2218               tree vectype;
2219 
2220               if (known_alignment_for_access_p (dr)
2221                   || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2222                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2223                 {
2224                   do_versioning = false;
2225                   break;
2226                 }
2227 
2228               stmt = DR_STMT (dr);
2229               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2230               gcc_assert (vectype);
2231 
2232 	      /* At present we don't support versioning for alignment
2233 		 with variable VF, since there's no guarantee that the
2234 		 VF is a power of two.  We could relax this if we added
2235 		 a way of enforcing a power-of-two size.  */
2236 	      unsigned HOST_WIDE_INT size;
2237 	      if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
2238 		{
2239 		  do_versioning = false;
2240 		  break;
2241 		}
2242 
2243               /* The rightmost bits of an aligned address must be zeros.
2244                  Construct the mask needed for this test.  For example,
2245                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2246                  mask must be 15 = 0xf. */
2247 	      mask = size - 1;
2248 
2249               /* FORNOW: use the same mask to test all potentially unaligned
2250                  references in the loop.  The vectorizer currently supports
2251                  a single vector size, see the reference to
2252                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2253                  vectorization factor is computed.  */
2254               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2255                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2256               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2257               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
2258 		      DR_STMT (dr));
2259             }
2260         }
2261 
2262       /* Versioning requires at least one misaligned data reference.  */
2263       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2264         do_versioning = false;
2265       else if (!do_versioning)
2266         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2267     }
2268 
2269   if (do_versioning)
2270     {
2271       vec<gimple *> may_misalign_stmts
2272         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2273       gimple *stmt;
2274 
2275       /* It can now be assumed that the data references in the statements
2276          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2277          of the loop being vectorized.  */
2278       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
2279         {
2280           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2281           dr = STMT_VINFO_DATA_REF (stmt_info);
2282 	  SET_DR_MISALIGNMENT (dr, 0);
2283 	  if (dump_enabled_p ())
2284             dump_printf_loc (MSG_NOTE, vect_location,
2285                              "Alignment of access forced using versioning.\n");
2286         }
2287 
2288       if (dump_enabled_p ())
2289         dump_printf_loc (MSG_NOTE, vect_location,
2290                          "Versioning for alignment will be applied.\n");
2291 
2292       /* Peeling and versioning can't be done together at this time.  */
2293       gcc_assert (! (do_peeling && do_versioning));
2294 
2295       stat = vect_verify_datarefs_alignment (loop_vinfo);
2296       gcc_assert (stat);
2297       return stat;
2298     }
2299 
2300   /* This point is reached if neither peeling nor versioning is being done.  */
2301   gcc_assert (! (do_peeling || do_versioning));
2302 
2303   stat = vect_verify_datarefs_alignment (loop_vinfo);
2304   return stat;
2305 }
2306 
2307 
2308 /* Function vect_find_same_alignment_drs.
2309 
2310    Update group and alignment relations according to the chosen
2311    vectorization factor.  */
2312 
2313 static void
2314 vect_find_same_alignment_drs (struct data_dependence_relation *ddr)
2315 {
2316   struct data_reference *dra = DDR_A (ddr);
2317   struct data_reference *drb = DDR_B (ddr);
2318   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2319   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2320 
2321   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2322     return;
2323 
2324   if (dra == drb)
2325     return;
2326 
2327   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
2328       || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
2329       || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2330     return;
2331 
2332   /* Two references with distance zero have the same alignment.  */
2333   poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra))
2334 			  - wi::to_poly_offset (DR_INIT (drb)));
2335   if (maybe_ne (diff, 0))
2336     {
2337       /* Get the wider of the two alignments.  */
2338       unsigned int align_a = (vect_calculate_target_alignment (dra)
2339 			      / BITS_PER_UNIT);
2340       unsigned int align_b = (vect_calculate_target_alignment (drb)
2341 			      / BITS_PER_UNIT);
2342       unsigned int max_align = MAX (align_a, align_b);
2343 
2344       /* Require the gap to be a multiple of the larger vector alignment.  */
2345       if (!multiple_p (diff, max_align))
2346 	return;
2347     }
2348 
2349   STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2350   STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2351   if (dump_enabled_p ())
2352     {
2353       dump_printf_loc (MSG_NOTE, vect_location,
2354 		       "accesses have the same alignment: ");
2355       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2356       dump_printf (MSG_NOTE,  " and ");
2357       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2358       dump_printf (MSG_NOTE, "\n");
2359     }
2360 }
2361 
2362 
2363 /* Function vect_analyze_data_refs_alignment
2364 
2365    Analyze the alignment of the data-references in the loop.
2366    Return FALSE if a data reference is found that cannot be vectorized.  */
2367 
2368 bool
2369 vect_analyze_data_refs_alignment (loop_vec_info vinfo)
2370 {
2371   if (dump_enabled_p ())
2372     dump_printf_loc (MSG_NOTE, vect_location,
2373                      "=== vect_analyze_data_refs_alignment ===\n");
2374 
2375   /* Mark groups of data references with same alignment using
2376      data dependence information.  */
2377   vec<ddr_p> ddrs = vinfo->ddrs;
2378   struct data_dependence_relation *ddr;
2379   unsigned int i;
2380 
2381   FOR_EACH_VEC_ELT (ddrs, i, ddr)
2382     vect_find_same_alignment_drs (ddr);
2383 
2384   vec<data_reference_p> datarefs = vinfo->datarefs;
2385   struct data_reference *dr;
2386 
2387   vect_record_base_alignments (vinfo);
2388   FOR_EACH_VEC_ELT (datarefs, i, dr)
2389     {
2390       stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
2391       if (STMT_VINFO_VECTORIZABLE (stmt_info)
2392 	  && !vect_compute_data_ref_alignment (dr))
2393 	{
2394 	  /* Strided accesses perform only component accesses, misalignment
2395 	     information is irrelevant for them.  */
2396 	  if (STMT_VINFO_STRIDED_P (stmt_info)
2397 	      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2398 	    continue;
2399 
2400 	  if (dump_enabled_p ())
2401 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2402 			     "not vectorized: can't calculate alignment "
2403 			     "for data ref.\n");
2404 
2405 	  return false;
2406 	}
2407     }
2408 
2409   return true;
2410 }
2411 
2412 
2413 /* Analyze alignment of DRs of stmts in NODE.  */
2414 
2415 static bool
2416 vect_slp_analyze_and_verify_node_alignment (slp_tree node)
2417 {
2418   /* We vectorize from the first scalar stmt in the node unless
2419      the node is permuted in which case we start from the first
2420      element in the group.  */
2421   gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2422   data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2423   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
2424     first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
2425 
2426   data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2427   if (! vect_compute_data_ref_alignment (dr)
2428       /* For creating the data-ref pointer we need alignment of the
2429 	 first element anyway.  */
2430       || (dr != first_dr
2431 	  && ! vect_compute_data_ref_alignment (first_dr))
2432       || ! verify_data_ref_alignment (dr))
2433     {
2434       if (dump_enabled_p ())
2435 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2436 			 "not vectorized: bad data alignment in basic "
2437 			 "block.\n");
2438       return false;
2439     }
2440 
2441   return true;
2442 }
2443 
2444 /* Function vect_slp_analyze_instance_alignment
2445 
2446    Analyze the alignment of the data-references in the SLP instance.
2447    Return FALSE if a data reference is found that cannot be vectorized.  */
2448 
2449 bool
2450 vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
2451 {
2452   if (dump_enabled_p ())
2453     dump_printf_loc (MSG_NOTE, vect_location,
2454                      "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
2455 
2456   slp_tree node;
2457   unsigned i;
2458   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2459     if (! vect_slp_analyze_and_verify_node_alignment (node))
2460       return false;
2461 
2462   node = SLP_INSTANCE_TREE (instance);
2463   if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
2464       && ! vect_slp_analyze_and_verify_node_alignment
2465 	     (SLP_INSTANCE_TREE (instance)))
2466     return false;
2467 
2468   return true;
2469 }
2470 
2471 
2472 /* Analyze groups of accesses: check that DR belongs to a group of
2473    accesses of legal size, step, etc.  Detect gaps, single element
2474    interleaving, and other special cases. Set grouped access info.
2475    Collect groups of strided stores for further use in SLP analysis.
2476    Worker for vect_analyze_group_access.  */
2477 
2478 static bool
2479 vect_analyze_group_access_1 (struct data_reference *dr)
2480 {
2481   tree step = DR_STEP (dr);
2482   tree scalar_type = TREE_TYPE (DR_REF (dr));
2483   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2484   gimple *stmt = DR_STMT (dr);
2485   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2486   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2487   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2488   HOST_WIDE_INT dr_step = -1;
2489   HOST_WIDE_INT groupsize, last_accessed_element = 1;
2490   bool slp_impossible = false;
2491 
2492   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2493      size of the interleaving group (including gaps).  */
2494   if (tree_fits_shwi_p (step))
2495     {
2496       dr_step = tree_to_shwi (step);
2497       /* Check that STEP is a multiple of type size.  Otherwise there is
2498          a non-element-sized gap at the end of the group which we
2499 	 cannot represent in GROUP_GAP or GROUP_SIZE.
2500 	 ???  As we can handle non-constant step fine here we should
2501 	 simply remove uses of GROUP_GAP between the last and first
2502 	 element and instead rely on DR_STEP.  GROUP_SIZE then would
2503 	 simply not include that gap.  */
2504       if ((dr_step % type_size) != 0)
2505 	{
2506 	  if (dump_enabled_p ())
2507 	    {
2508 	      dump_printf_loc (MSG_NOTE, vect_location,
2509 	                       "Step ");
2510 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2511 	      dump_printf (MSG_NOTE,
2512 			   " is not a multiple of the element size for ");
2513 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2514 	      dump_printf (MSG_NOTE, "\n");
2515 	    }
2516 	  return false;
2517 	}
2518       groupsize = absu_hwi (dr_step) / type_size;
2519     }
2520   else
2521     groupsize = 0;
2522 
2523   /* Not consecutive access is possible only if it is a part of interleaving.  */
2524   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2525     {
2526       /* Check if it this DR is a part of interleaving, and is a single
2527 	 element of the group that is accessed in the loop.  */
2528 
2529       /* Gaps are supported only for loads. STEP must be a multiple of the type
2530 	 size.  */
2531       if (DR_IS_READ (dr)
2532 	  && (dr_step % type_size) == 0
2533 	  && groupsize > 0)
2534 	{
2535 	  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2536 	  GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2537 	  GROUP_GAP (stmt_info) = groupsize - 1;
2538 	  if (dump_enabled_p ())
2539 	    {
2540 	      dump_printf_loc (MSG_NOTE, vect_location,
2541 	                       "Detected single element interleaving ");
2542 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2543 	      dump_printf (MSG_NOTE, " step ");
2544 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2545 	      dump_printf (MSG_NOTE, "\n");
2546 	    }
2547 
2548 	  return true;
2549 	}
2550 
2551       if (dump_enabled_p ())
2552         {
2553  	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554 	                   "not consecutive access ");
2555 	  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2556         }
2557 
2558       if (bb_vinfo)
2559         {
2560           /* Mark the statement as unvectorizable.  */
2561           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2562           return true;
2563         }
2564 
2565       dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
2566       STMT_VINFO_STRIDED_P (stmt_info) = true;
2567       return true;
2568     }
2569 
2570   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2571     {
2572       /* First stmt in the interleaving chain. Check the chain.  */
2573       gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2574       struct data_reference *data_ref = dr;
2575       unsigned int count = 1;
2576       tree prev_init = DR_INIT (data_ref);
2577       gimple *prev = stmt;
2578       HOST_WIDE_INT diff, gaps = 0;
2579 
2580       /* By construction, all group members have INTEGER_CST DR_INITs.  */
2581       while (next)
2582         {
2583           /* Skip same data-refs.  In case that two or more stmts share
2584              data-ref (supported only for loads), we vectorize only the first
2585              stmt, and the rest get their vectorized loads from the first
2586              one.  */
2587           if (!tree_int_cst_compare (DR_INIT (data_ref),
2588                                      DR_INIT (STMT_VINFO_DATA_REF (
2589 						   vinfo_for_stmt (next)))))
2590             {
2591               if (DR_IS_WRITE (data_ref))
2592                 {
2593                   if (dump_enabled_p ())
2594                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595                                      "Two store stmts share the same dr.\n");
2596                   return false;
2597                 }
2598 
2599 	      if (dump_enabled_p ())
2600 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2601 				 "Two or more load stmts share the same dr.\n");
2602 
2603               /* For load use the same data-ref load.  */
2604               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2605 
2606               prev = next;
2607               next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2608               continue;
2609             }
2610 
2611           prev = next;
2612           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2613 
2614 	  /* All group members have the same STEP by construction.  */
2615 	  gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
2616 
2617           /* Check that the distance between two accesses is equal to the type
2618              size. Otherwise, we have gaps.  */
2619           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2620                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2621 	  if (diff != 1)
2622 	    {
2623 	      /* FORNOW: SLP of accesses with gaps is not supported.  */
2624 	      slp_impossible = true;
2625 	      if (DR_IS_WRITE (data_ref))
2626 		{
2627                   if (dump_enabled_p ())
2628                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2629                                      "interleaved store with gaps\n");
2630 		  return false;
2631 		}
2632 
2633               gaps += diff - 1;
2634 	    }
2635 
2636 	  last_accessed_element += diff;
2637 
2638           /* Store the gap from the previous member of the group. If there is no
2639              gap in the access, GROUP_GAP is always 1.  */
2640           GROUP_GAP (vinfo_for_stmt (next)) = diff;
2641 
2642           prev_init = DR_INIT (data_ref);
2643           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2644           /* Count the number of data-refs in the chain.  */
2645           count++;
2646         }
2647 
2648       if (groupsize == 0)
2649         groupsize = count + gaps;
2650 
2651       /* This could be UINT_MAX but as we are generating code in a very
2652          inefficient way we have to cap earlier.  See PR78699 for example.  */
2653       if (groupsize > 4096)
2654 	{
2655 	  if (dump_enabled_p ())
2656 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2657 			     "group is too large\n");
2658 	  return false;
2659 	}
2660 
2661       /* Check that the size of the interleaving is equal to count for stores,
2662          i.e., that there are no gaps.  */
2663       if (groupsize != count
2664 	  && !DR_IS_READ (dr))
2665         {
2666 	  if (dump_enabled_p ())
2667 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2668 			     "interleaved store with gaps\n");
2669 	  return false;
2670 	}
2671 
2672       /* If there is a gap after the last load in the group it is the
2673 	 difference between the groupsize and the last accessed
2674 	 element.
2675 	 When there is no gap, this difference should be 0.  */
2676       GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
2677 
2678       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2679       if (dump_enabled_p ())
2680 	{
2681 	  dump_printf_loc (MSG_NOTE, vect_location,
2682 			   "Detected interleaving ");
2683 	  if (DR_IS_READ (dr))
2684 	    dump_printf (MSG_NOTE, "load ");
2685 	  else
2686 	    dump_printf (MSG_NOTE, "store ");
2687 	  dump_printf (MSG_NOTE, "of size %u starting with ",
2688 		       (unsigned)groupsize);
2689 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2690 	  if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2691 	    dump_printf_loc (MSG_NOTE, vect_location,
2692 			     "There is a gap of %u elements after the group\n",
2693 			     GROUP_GAP (vinfo_for_stmt (stmt)));
2694 	}
2695 
2696       /* SLP: create an SLP data structure for every interleaving group of
2697 	 stores for further analysis in vect_analyse_slp.  */
2698       if (DR_IS_WRITE (dr) && !slp_impossible)
2699         {
2700           if (loop_vinfo)
2701             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2702           if (bb_vinfo)
2703             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2704         }
2705     }
2706 
2707   return true;
2708 }
2709 
2710 /* Analyze groups of accesses: check that DR belongs to a group of
2711    accesses of legal size, step, etc.  Detect gaps, single element
2712    interleaving, and other special cases. Set grouped access info.
2713    Collect groups of strided stores for further use in SLP analysis.  */
2714 
2715 static bool
2716 vect_analyze_group_access (struct data_reference *dr)
2717 {
2718   if (!vect_analyze_group_access_1 (dr))
2719     {
2720       /* Dissolve the group if present.  */
2721       gimple *next;
2722       gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
2723       while (stmt)
2724 	{
2725 	  stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2726 	  next = GROUP_NEXT_ELEMENT (vinfo);
2727 	  GROUP_FIRST_ELEMENT (vinfo) = NULL;
2728 	  GROUP_NEXT_ELEMENT (vinfo) = NULL;
2729 	  stmt = next;
2730 	}
2731       return false;
2732     }
2733   return true;
2734 }
2735 
2736 /* Analyze the access pattern of the data-reference DR.
2737    In case of non-consecutive accesses call vect_analyze_group_access() to
2738    analyze groups of accesses.  */
2739 
2740 static bool
2741 vect_analyze_data_ref_access (struct data_reference *dr)
2742 {
2743   tree step = DR_STEP (dr);
2744   tree scalar_type = TREE_TYPE (DR_REF (dr));
2745   gimple *stmt = DR_STMT (dr);
2746   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2747   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2748   struct loop *loop = NULL;
2749 
2750   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2751     return true;
2752 
2753   if (loop_vinfo)
2754     loop = LOOP_VINFO_LOOP (loop_vinfo);
2755 
2756   if (loop_vinfo && !step)
2757     {
2758       if (dump_enabled_p ())
2759 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2760 	                 "bad data-ref access in loop\n");
2761       return false;
2762     }
2763 
2764   /* Allow loads with zero step in inner-loop vectorization.  */
2765   if (loop_vinfo && integer_zerop (step))
2766     {
2767       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2768       if (!nested_in_vect_loop_p (loop, stmt))
2769 	return DR_IS_READ (dr);
2770       /* Allow references with zero step for outer loops marked
2771 	 with pragma omp simd only - it guarantees absence of
2772 	 loop-carried dependencies between inner loop iterations.  */
2773       if (loop->safelen < 2)
2774 	{
2775 	  if (dump_enabled_p ())
2776 	    dump_printf_loc (MSG_NOTE, vect_location,
2777 			     "zero step in inner loop of nest\n");
2778 	  return false;
2779 	}
2780     }
2781 
2782   if (loop && nested_in_vect_loop_p (loop, stmt))
2783     {
2784       /* Interleaved accesses are not yet supported within outer-loop
2785         vectorization for references in the inner-loop.  */
2786       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2787 
2788       /* For the rest of the analysis we use the outer-loop step.  */
2789       step = STMT_VINFO_DR_STEP (stmt_info);
2790       if (integer_zerop (step))
2791 	{
2792 	  if (dump_enabled_p ())
2793 	    dump_printf_loc (MSG_NOTE, vect_location,
2794 	                     "zero step in outer loop.\n");
2795 	  return DR_IS_READ (dr);
2796 	}
2797     }
2798 
2799   /* Consecutive?  */
2800   if (TREE_CODE (step) == INTEGER_CST)
2801     {
2802       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2803       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2804 	  || (dr_step < 0
2805 	      && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2806 	{
2807 	  /* Mark that it is not interleaving.  */
2808 	  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2809 	  return true;
2810 	}
2811     }
2812 
2813   if (loop && nested_in_vect_loop_p (loop, stmt))
2814     {
2815       if (dump_enabled_p ())
2816 	dump_printf_loc (MSG_NOTE, vect_location,
2817 	                 "grouped access in outer loop.\n");
2818       return false;
2819     }
2820 
2821 
2822   /* Assume this is a DR handled by non-constant strided load case.  */
2823   if (TREE_CODE (step) != INTEGER_CST)
2824     return (STMT_VINFO_STRIDED_P (stmt_info)
2825 	    && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2826 		|| vect_analyze_group_access (dr)));
2827 
2828   /* Not consecutive access - check if it's a part of interleaving group.  */
2829   return vect_analyze_group_access (dr);
2830 }
2831 
2832 /* Compare two data-references DRA and DRB to group them into chunks
2833    suitable for grouping.  */
2834 
2835 static int
2836 dr_group_sort_cmp (const void *dra_, const void *drb_)
2837 {
2838   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2839   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2840   int cmp;
2841 
2842   /* Stabilize sort.  */
2843   if (dra == drb)
2844     return 0;
2845 
2846   /* DRs in different loops never belong to the same group.  */
2847   loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
2848   loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
2849   if (loopa != loopb)
2850     return loopa->num < loopb->num ? -1 : 1;
2851 
2852   /* Ordering of DRs according to base.  */
2853   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2854 			       DR_BASE_ADDRESS (drb));
2855   if (cmp != 0)
2856     return cmp;
2857 
2858   /* And according to DR_OFFSET.  */
2859   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2860   if (cmp != 0)
2861     return cmp;
2862 
2863   /* Put reads before writes.  */
2864   if (DR_IS_READ (dra) != DR_IS_READ (drb))
2865     return DR_IS_READ (dra) ? -1 : 1;
2866 
2867   /* Then sort after access size.  */
2868   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2869 			       TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2870   if (cmp != 0)
2871     return cmp;
2872 
2873   /* And after step.  */
2874   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2875   if (cmp != 0)
2876     return cmp;
2877 
2878   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
2879   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
2880   if (cmp == 0)
2881     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2882   return cmp;
2883 }
2884 
2885 /* If OP is the result of a conversion, return the unconverted value,
2886    otherwise return null.  */
2887 
2888 static tree
2889 strip_conversion (tree op)
2890 {
2891   if (TREE_CODE (op) != SSA_NAME)
2892     return NULL_TREE;
2893   gimple *stmt = SSA_NAME_DEF_STMT (op);
2894   if (!is_gimple_assign (stmt)
2895       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
2896     return NULL_TREE;
2897   return gimple_assign_rhs1 (stmt);
2898 }
2899 
2900 /* Return true if vectorizable_* routines can handle statements STMT1
2901    and STMT2 being in a single group.  */
2902 
2903 static bool
2904 can_group_stmts_p (gimple *stmt1, gimple *stmt2)
2905 {
2906   if (gimple_assign_single_p (stmt1))
2907     return gimple_assign_single_p (stmt2);
2908 
2909   if (is_gimple_call (stmt1) && gimple_call_internal_p (stmt1))
2910     {
2911       /* Check for two masked loads or two masked stores.  */
2912       if (!is_gimple_call (stmt2) || !gimple_call_internal_p (stmt2))
2913 	return false;
2914       internal_fn ifn = gimple_call_internal_fn (stmt1);
2915       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
2916 	return false;
2917       if (ifn != gimple_call_internal_fn (stmt2))
2918 	return false;
2919 
2920       /* Check that the masks are the same.  Cope with casts of masks,
2921 	 like those created by build_mask_conversion.  */
2922       tree mask1 = gimple_call_arg (stmt1, 2);
2923       tree mask2 = gimple_call_arg (stmt2, 2);
2924       if (!operand_equal_p (mask1, mask2, 0))
2925 	{
2926 	  mask1 = strip_conversion (mask1);
2927 	  if (!mask1)
2928 	    return false;
2929 	  mask2 = strip_conversion (mask2);
2930 	  if (!mask2)
2931 	    return false;
2932 	  if (!operand_equal_p (mask1, mask2, 0))
2933 	    return false;
2934 	}
2935       return true;
2936     }
2937 
2938   return false;
2939 }
2940 
2941 /* Function vect_analyze_data_ref_accesses.
2942 
2943    Analyze the access pattern of all the data references in the loop.
2944 
2945    FORNOW: the only access pattern that is considered vectorizable is a
2946 	   simple step 1 (consecutive) access.
2947 
2948    FORNOW: handle only arrays and pointer accesses.  */
2949 
2950 bool
2951 vect_analyze_data_ref_accesses (vec_info *vinfo)
2952 {
2953   unsigned int i;
2954   vec<data_reference_p> datarefs = vinfo->datarefs;
2955   struct data_reference *dr;
2956 
2957   if (dump_enabled_p ())
2958     dump_printf_loc (MSG_NOTE, vect_location,
2959                      "=== vect_analyze_data_ref_accesses ===\n");
2960 
2961   if (datarefs.is_empty ())
2962     return true;
2963 
2964   /* Sort the array of datarefs to make building the interleaving chains
2965      linear.  Don't modify the original vector's order, it is needed for
2966      determining what dependencies are reversed.  */
2967   vec<data_reference_p> datarefs_copy = datarefs.copy ();
2968   datarefs_copy.qsort (dr_group_sort_cmp);
2969 
2970   /* Build the interleaving chains.  */
2971   for (i = 0; i < datarefs_copy.length () - 1;)
2972     {
2973       data_reference_p dra = datarefs_copy[i];
2974       stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2975       stmt_vec_info lastinfo = NULL;
2976       if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
2977 	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
2978 	{
2979 	  ++i;
2980 	  continue;
2981 	}
2982       for (i = i + 1; i < datarefs_copy.length (); ++i)
2983 	{
2984 	  data_reference_p drb = datarefs_copy[i];
2985 	  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2986 	  if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
2987 	      || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
2988 	    break;
2989 
2990 	  /* ???  Imperfect sorting (non-compatible types, non-modulo
2991 	     accesses, same accesses) can lead to a group to be artificially
2992 	     split here as we don't just skip over those.  If it really
2993 	     matters we can push those to a worklist and re-iterate
2994 	     over them.  The we can just skip ahead to the next DR here.  */
2995 
2996 	  /* DRs in a different loop should not be put into the same
2997 	     interleaving group.  */
2998 	  if (gimple_bb (DR_STMT (dra))->loop_father
2999 	      != gimple_bb (DR_STMT (drb))->loop_father)
3000 	    break;
3001 
3002 	  /* Check that the data-refs have same first location (except init)
3003 	     and they are both either store or load (not load and store,
3004 	     not masked loads or stores).  */
3005 	  if (DR_IS_READ (dra) != DR_IS_READ (drb)
3006 	      || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3007 					DR_BASE_ADDRESS (drb)) != 0
3008 	      || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
3009 	      || !can_group_stmts_p (DR_STMT (dra), DR_STMT (drb)))
3010 	    break;
3011 
3012 	  /* Check that the data-refs have the same constant size.  */
3013 	  tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
3014 	  tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
3015 	  if (!tree_fits_uhwi_p (sza)
3016 	      || !tree_fits_uhwi_p (szb)
3017 	      || !tree_int_cst_equal (sza, szb))
3018 	    break;
3019 
3020 	  /* Check that the data-refs have the same step.  */
3021 	  if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
3022 	    break;
3023 
3024 	  /* Check the types are compatible.
3025 	     ???  We don't distinguish this during sorting.  */
3026 	  if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
3027 				   TREE_TYPE (DR_REF (drb))))
3028 	    break;
3029 
3030 	  /* Check that the DR_INITs are compile-time constants.  */
3031 	  if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST
3032 	      || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
3033 	    break;
3034 
3035 	  /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
3036 	  HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3037 	  HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
3038 	  HOST_WIDE_INT init_prev
3039 	    = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
3040 	  gcc_assert (init_a <= init_b
3041 		      && init_a <= init_prev
3042 		      && init_prev <= init_b);
3043 
3044 	  /* Do not place the same access in the interleaving chain twice.  */
3045 	  if (init_b == init_prev)
3046 	    {
3047 	      gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
3048 			  < gimple_uid (DR_STMT (drb)));
3049 	      /* ???  For now we simply "drop" the later reference which is
3050 	         otherwise the same rather than finishing off this group.
3051 		 In the end we'd want to re-process duplicates forming
3052 		 multiple groups from the refs, likely by just collecting
3053 		 all candidates (including duplicates and split points
3054 		 below) in a vector and then process them together.  */
3055 	      continue;
3056 	    }
3057 
3058 	  /* If init_b == init_a + the size of the type * k, we have an
3059 	     interleaving, and DRA is accessed before DRB.  */
3060 	  HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3061 	  if (type_size_a == 0
3062 	      || (init_b - init_a) % type_size_a != 0)
3063 	    break;
3064 
3065 	  /* If we have a store, the accesses are adjacent.  This splits
3066 	     groups into chunks we support (we don't support vectorization
3067 	     of stores with gaps).  */
3068 	  if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
3069 	    break;
3070 
3071 	  /* If the step (if not zero or non-constant) is greater than the
3072 	     difference between data-refs' inits this splits groups into
3073 	     suitable sizes.  */
3074 	  if (tree_fits_shwi_p (DR_STEP (dra)))
3075 	    {
3076 	      HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
3077 	      if (step != 0 && step <= (init_b - init_a))
3078 		break;
3079 	    }
3080 
3081 	  if (dump_enabled_p ())
3082 	    {
3083 	      dump_printf_loc (MSG_NOTE, vect_location,
3084 			       "Detected interleaving ");
3085 	      if (DR_IS_READ (dra))
3086 		dump_printf (MSG_NOTE, "load ");
3087 	      else
3088 		dump_printf (MSG_NOTE, "store ");
3089 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
3090 	      dump_printf (MSG_NOTE,  " and ");
3091 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
3092 	      dump_printf (MSG_NOTE, "\n");
3093 	    }
3094 
3095 	  /* Link the found element into the group list.  */
3096 	  if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
3097 	    {
3098 	      GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
3099 	      lastinfo = stmtinfo_a;
3100 	    }
3101 	  GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
3102 	  GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
3103 	  lastinfo = stmtinfo_b;
3104 	}
3105     }
3106 
3107   FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
3108     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
3109         && !vect_analyze_data_ref_access (dr))
3110       {
3111 	if (dump_enabled_p ())
3112 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3113 	                   "not vectorized: complicated access pattern.\n");
3114 
3115         if (is_a <bb_vec_info> (vinfo))
3116           {
3117             /* Mark the statement as not vectorizable.  */
3118             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3119             continue;
3120           }
3121         else
3122 	  {
3123 	    datarefs_copy.release ();
3124 	    return false;
3125 	  }
3126       }
3127 
3128   datarefs_copy.release ();
3129   return true;
3130 }
3131 
3132 /* Function vect_vfa_segment_size.
3133 
3134    Input:
3135      DR: The data reference.
3136      LENGTH_FACTOR: segment length to consider.
3137 
3138    Return a value suitable for the dr_with_seg_len::seg_len field.
3139    This is the "distance travelled" by the pointer from the first
3140    iteration in the segment to the last.  Note that it does not include
3141    the size of the access; in effect it only describes the first byte.  */
3142 
3143 static tree
3144 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
3145 {
3146   length_factor = size_binop (MINUS_EXPR,
3147 			      fold_convert (sizetype, length_factor),
3148 			      size_one_node);
3149   return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr)),
3150 		     length_factor);
3151 }
3152 
3153 /* Return a value that, when added to abs (vect_vfa_segment_size (dr)),
3154    gives the worst-case number of bytes covered by the segment.  */
3155 
3156 static unsigned HOST_WIDE_INT
3157 vect_vfa_access_size (data_reference *dr)
3158 {
3159   stmt_vec_info stmt_vinfo = vinfo_for_stmt (DR_STMT (dr));
3160   tree ref_type = TREE_TYPE (DR_REF (dr));
3161   unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3162   unsigned HOST_WIDE_INT access_size = ref_size;
3163   if (GROUP_FIRST_ELEMENT (stmt_vinfo))
3164     {
3165       gcc_assert (GROUP_FIRST_ELEMENT (stmt_vinfo) == DR_STMT (dr));
3166       access_size *= GROUP_SIZE (stmt_vinfo) - GROUP_GAP (stmt_vinfo);
3167     }
3168   if (STMT_VINFO_VEC_STMT (stmt_vinfo)
3169       && (vect_supportable_dr_alignment (dr, false)
3170 	  == dr_explicit_realign_optimized))
3171     {
3172       /* We might access a full vector's worth.  */
3173       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3174       access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
3175     }
3176   return access_size;
3177 }
3178 
3179 /* Get the minimum alignment for all the scalar accesses that DR describes.  */
3180 
3181 static unsigned int
3182 vect_vfa_align (const data_reference *dr)
3183 {
3184   return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr)));
3185 }
3186 
3187 /* Function vect_no_alias_p.
3188 
3189    Given data references A and B with equal base and offset, see whether
3190    the alias relation can be decided at compilation time.  Return 1 if
3191    it can and the references alias, 0 if it can and the references do
3192    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A,
3193    SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
3194    of dr_with_seg_len::{seg_len,access_size} for A and B.  */
3195 
3196 static int
3197 vect_compile_time_alias (struct data_reference *a, struct data_reference *b,
3198 			 tree segment_length_a, tree segment_length_b,
3199 			 unsigned HOST_WIDE_INT access_size_a,
3200 			 unsigned HOST_WIDE_INT access_size_b)
3201 {
3202   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a));
3203   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b));
3204   poly_uint64 const_length_a;
3205   poly_uint64 const_length_b;
3206 
3207   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
3208      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3209      [a, a+12) */
3210   if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
3211     {
3212       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
3213       offset_a = (offset_a + access_size_a) - const_length_a;
3214     }
3215   else
3216     const_length_a = tree_to_poly_uint64 (segment_length_a);
3217   if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
3218     {
3219       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
3220       offset_b = (offset_b + access_size_b) - const_length_b;
3221     }
3222   else
3223     const_length_b = tree_to_poly_uint64 (segment_length_b);
3224 
3225   const_length_a += access_size_a;
3226   const_length_b += access_size_b;
3227 
3228   if (ranges_known_overlap_p (offset_a, const_length_a,
3229 			      offset_b, const_length_b))
3230     return 1;
3231 
3232   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
3233 			       offset_b, const_length_b))
3234     return 0;
3235 
3236   return -1;
3237 }
3238 
3239 /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
3240    in DDR is >= VF.  */
3241 
3242 static bool
3243 dependence_distance_ge_vf (data_dependence_relation *ddr,
3244 			   unsigned int loop_depth, poly_uint64 vf)
3245 {
3246   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3247       || DDR_NUM_DIST_VECTS (ddr) == 0)
3248     return false;
3249 
3250   /* If the dependence is exact, we should have limited the VF instead.  */
3251   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3252 
3253   unsigned int i;
3254   lambda_vector dist_v;
3255   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3256     {
3257       HOST_WIDE_INT dist = dist_v[loop_depth];
3258       if (dist != 0
3259 	  && !(dist > 0 && DDR_REVERSED_P (ddr))
3260 	  && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
3261 	return false;
3262     }
3263 
3264   if (dump_enabled_p ())
3265     {
3266       dump_printf_loc (MSG_NOTE, vect_location,
3267 		       "dependence distance between ");
3268       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
3269       dump_printf (MSG_NOTE,  " and ");
3270       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
3271       dump_printf (MSG_NOTE,  " is >= VF\n");
3272     }
3273 
3274   return true;
3275 }
3276 
3277 /* Dump LOWER_BOUND using flags DUMP_KIND.  Dumps are known to be enabled.  */
3278 
3279 static void
3280 dump_lower_bound (int dump_kind, const vec_lower_bound &lower_bound)
3281 {
3282   dump_printf (dump_kind, "%s (", lower_bound.unsigned_p ? "unsigned" : "abs");
3283   dump_generic_expr (dump_kind, TDF_SLIM, lower_bound.expr);
3284   dump_printf (dump_kind, ") >= ");
3285   dump_dec (dump_kind, lower_bound.min_value);
3286 }
3287 
3288 /* Record that the vectorized loop requires the vec_lower_bound described
3289    by EXPR, UNSIGNED_P and MIN_VALUE.  */
3290 
3291 static void
3292 vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
3293 			poly_uint64 min_value)
3294 {
3295   vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3296   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3297     if (operand_equal_p (lower_bounds[i].expr, expr, 0))
3298       {
3299 	unsigned_p &= lower_bounds[i].unsigned_p;
3300 	min_value = upper_bound (lower_bounds[i].min_value, min_value);
3301 	if (lower_bounds[i].unsigned_p != unsigned_p
3302 	    || maybe_lt (lower_bounds[i].min_value, min_value))
3303 	  {
3304 	    lower_bounds[i].unsigned_p = unsigned_p;
3305 	    lower_bounds[i].min_value = min_value;
3306 	    if (dump_enabled_p ())
3307 	      {
3308 		dump_printf_loc (MSG_NOTE, vect_location,
3309 				 "updating run-time check to ");
3310 		dump_lower_bound (MSG_NOTE, lower_bounds[i]);
3311 		dump_printf (MSG_NOTE, "\n");
3312 	      }
3313 	  }
3314 	return;
3315       }
3316 
3317   vec_lower_bound lower_bound (expr, unsigned_p, min_value);
3318   if (dump_enabled_p ())
3319     {
3320       dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
3321       dump_lower_bound (MSG_NOTE, lower_bound);
3322       dump_printf (MSG_NOTE, "\n");
3323     }
3324   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
3325 }
3326 
3327 /* Return true if it's unlikely that the step of the vectorized form of DR
3328    will span fewer than GAP bytes.  */
3329 
3330 static bool
3331 vect_small_gap_p (loop_vec_info loop_vinfo, data_reference *dr, poly_int64 gap)
3332 {
3333   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3334   HOST_WIDE_INT count
3335     = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
3336   if (GROUP_FIRST_ELEMENT (stmt_info))
3337     count *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
3338   return estimated_poly_value (gap) <= count * vect_get_scalar_dr_size (dr);
3339 }
3340 
3341 /* Return true if we know that there is no alias between DR_A and DR_B
3342    when abs (DR_STEP (DR_A)) >= N for some N.  When returning true, set
3343    *LOWER_BOUND_OUT to this N.  */
3344 
3345 static bool
3346 vectorizable_with_step_bound_p (data_reference *dr_a, data_reference *dr_b,
3347 				poly_uint64 *lower_bound_out)
3348 {
3349   /* Check that there is a constant gap of known sign between DR_A
3350      and DR_B.  */
3351   poly_int64 init_a, init_b;
3352   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
3353       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
3354       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
3355       || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
3356       || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
3357       || !ordered_p (init_a, init_b))
3358     return false;
3359 
3360   /* Sort DR_A and DR_B by the address they access.  */
3361   if (maybe_lt (init_b, init_a))
3362     {
3363       std::swap (init_a, init_b);
3364       std::swap (dr_a, dr_b);
3365     }
3366 
3367   /* If the two accesses could be dependent within a scalar iteration,
3368      make sure that we'd retain their order.  */
3369   if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_a), init_b)
3370       && !vect_preserves_scalar_order_p (DR_STMT (dr_a), DR_STMT (dr_b)))
3371     return false;
3372 
3373   /* There is no alias if abs (DR_STEP) is greater than or equal to
3374      the bytes spanned by the combination of the two accesses.  */
3375   *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_b) - init_a;
3376   return true;
3377 }
3378 
3379 /* Function vect_prune_runtime_alias_test_list.
3380 
3381    Prune a list of ddrs to be tested at run-time by versioning for alias.
3382    Merge several alias checks into one if possible.
3383    Return FALSE if resulting list of ddrs is longer then allowed by
3384    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
3385 
3386 bool
3387 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3388 {
3389   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3390   hash_set <tree_pair_hash> compared_objects;
3391 
3392   vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3393   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3394     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3395   vec<vec_object_pair> &check_unequal_addrs
3396     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3397   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3398   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3399 
3400   ddr_p ddr;
3401   unsigned int i;
3402   tree length_factor;
3403 
3404   if (dump_enabled_p ())
3405     dump_printf_loc (MSG_NOTE, vect_location,
3406                      "=== vect_prune_runtime_alias_test_list ===\n");
3407 
3408   /* Step values are irrelevant for aliasing if the number of vector
3409      iterations is equal to the number of scalar iterations (which can
3410      happen for fully-SLP loops).  */
3411   bool ignore_step_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
3412 
3413   if (!ignore_step_p)
3414     {
3415       /* Convert the checks for nonzero steps into bound tests.  */
3416       tree value;
3417       FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
3418 	vect_check_lower_bound (loop_vinfo, value, true, 1);
3419     }
3420 
3421   if (may_alias_ddrs.is_empty ())
3422     return true;
3423 
3424   comp_alias_ddrs.create (may_alias_ddrs.length ());
3425 
3426   unsigned int loop_depth
3427     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3428 			  LOOP_VINFO_LOOP_NEST (loop_vinfo));
3429 
3430   /* First, we collect all data ref pairs for aliasing checks.  */
3431   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
3432     {
3433       int comp_res;
3434       poly_uint64 lower_bound;
3435       struct data_reference *dr_a, *dr_b;
3436       gimple *dr_group_first_a, *dr_group_first_b;
3437       tree segment_length_a, segment_length_b;
3438       unsigned HOST_WIDE_INT access_size_a, access_size_b;
3439       unsigned int align_a, align_b;
3440       gimple *stmt_a, *stmt_b;
3441 
3442       /* Ignore the alias if the VF we chose ended up being no greater
3443 	 than the dependence distance.  */
3444       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
3445 	continue;
3446 
3447       if (DDR_OBJECT_A (ddr))
3448 	{
3449 	  vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3450 	  if (!compared_objects.add (new_pair))
3451 	    {
3452 	      if (dump_enabled_p ())
3453 		{
3454 		  dump_printf_loc (MSG_NOTE, vect_location, "checking that ");
3455 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);
3456 		  dump_printf (MSG_NOTE, " and ");
3457 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);
3458 		  dump_printf (MSG_NOTE, " have different addresses\n");
3459 		}
3460 	      LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
3461 	    }
3462 	  continue;
3463 	}
3464 
3465       dr_a = DDR_A (ddr);
3466       stmt_a = DR_STMT (DDR_A (ddr));
3467 
3468       dr_b = DDR_B (ddr);
3469       stmt_b = DR_STMT (DDR_B (ddr));
3470 
3471       /* Skip the pair if inter-iteration dependencies are irrelevant
3472 	 and intra-iteration dependencies are guaranteed to be honored.  */
3473       if (ignore_step_p
3474 	  && (vect_preserves_scalar_order_p (stmt_a, stmt_b)
3475 	      || vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)))
3476 	{
3477 	  if (dump_enabled_p ())
3478 	    {
3479 	      dump_printf_loc (MSG_NOTE, vect_location,
3480 			       "no need for alias check between ");
3481 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
3482 	      dump_printf (MSG_NOTE, " and ");
3483 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
3484 	      dump_printf (MSG_NOTE, " when VF is 1\n");
3485 	    }
3486 	  continue;
3487 	}
3488 
3489       /* See whether we can handle the alias using a bounds check on
3490 	 the step, and whether that's likely to be the best approach.
3491 	 (It might not be, for example, if the minimum step is much larger
3492 	 than the number of bytes handled by one vector iteration.)  */
3493       if (!ignore_step_p
3494 	  && TREE_CODE (DR_STEP (dr_a)) != INTEGER_CST
3495 	  && vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)
3496 	  && (vect_small_gap_p (loop_vinfo, dr_a, lower_bound)
3497 	      || vect_small_gap_p (loop_vinfo, dr_b, lower_bound)))
3498 	{
3499 	  bool unsigned_p = dr_known_forward_stride_p (dr_a);
3500 	  if (dump_enabled_p ())
3501 	    {
3502 	      dump_printf_loc (MSG_NOTE, vect_location, "no alias between ");
3503 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
3504 	      dump_printf (MSG_NOTE, " and ");
3505 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
3506 	      dump_printf (MSG_NOTE, " when the step ");
3507 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_STEP (dr_a));
3508 	      dump_printf (MSG_NOTE, " is outside ");
3509 	      if (unsigned_p)
3510 		dump_printf (MSG_NOTE, "[0");
3511 	      else
3512 		{
3513 		  dump_printf (MSG_NOTE, "(");
3514 		  dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
3515 		}
3516 	      dump_printf (MSG_NOTE, ", ");
3517 	      dump_dec (MSG_NOTE, lower_bound);
3518 	      dump_printf (MSG_NOTE, ")\n");
3519 	    }
3520 	  vect_check_lower_bound (loop_vinfo, DR_STEP (dr_a), unsigned_p,
3521 				  lower_bound);
3522 	  continue;
3523 	}
3524 
3525       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
3526       if (dr_group_first_a)
3527 	{
3528 	  stmt_a = dr_group_first_a;
3529 	  dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
3530 	}
3531 
3532       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
3533       if (dr_group_first_b)
3534 	{
3535 	  stmt_b = dr_group_first_b;
3536 	  dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
3537 	}
3538 
3539       if (ignore_step_p)
3540 	{
3541 	  segment_length_a = size_zero_node;
3542 	  segment_length_b = size_zero_node;
3543 	}
3544       else
3545 	{
3546 	  if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
3547 	    length_factor = scalar_loop_iters;
3548 	  else
3549 	    length_factor = size_int (vect_factor);
3550 	  segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
3551 	  segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
3552 	}
3553       access_size_a = vect_vfa_access_size (dr_a);
3554       access_size_b = vect_vfa_access_size (dr_b);
3555       align_a = vect_vfa_align (dr_a);
3556       align_b = vect_vfa_align (dr_b);
3557 
3558       comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
3559 					DR_BASE_ADDRESS (dr_b));
3560       if (comp_res == 0)
3561 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a),
3562 					  DR_OFFSET (dr_b));
3563 
3564       /* See whether the alias is known at compilation time.  */
3565       if (comp_res == 0
3566 	  && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
3567 	  && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST
3568 	  && poly_int_tree_p (segment_length_a)
3569 	  && poly_int_tree_p (segment_length_b))
3570 	{
3571 	  int res = vect_compile_time_alias (dr_a, dr_b,
3572 					     segment_length_a,
3573 					     segment_length_b,
3574 					     access_size_a,
3575 					     access_size_b);
3576 	  if (res >= 0 && dump_enabled_p ())
3577 	    {
3578 	      dump_printf_loc (MSG_NOTE, vect_location,
3579 			       "can tell at compile time that ");
3580 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
3581 	      dump_printf (MSG_NOTE, " and ");
3582 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
3583 	      if (res == 0)
3584 		dump_printf (MSG_NOTE, " do not alias\n");
3585 	      else
3586 		dump_printf (MSG_NOTE, " alias\n");
3587 	    }
3588 
3589 	  if (res == 0)
3590 	    continue;
3591 
3592 	  if (res == 1)
3593 	    {
3594 	      if (dump_enabled_p ())
3595 		dump_printf_loc (MSG_NOTE, vect_location,
3596 				 "not vectorized: compilation time alias.\n");
3597 	      return false;
3598 	    }
3599 	}
3600 
3601       dr_with_seg_len_pair_t dr_with_seg_len_pair
3602 	(dr_with_seg_len (dr_a, segment_length_a, access_size_a, align_a),
3603 	 dr_with_seg_len (dr_b, segment_length_b, access_size_b, align_b));
3604 
3605       /* Canonicalize pairs by sorting the two DR members.  */
3606       if (comp_res > 0)
3607 	std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
3608 
3609       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
3610     }
3611 
3612   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
3613 
3614   unsigned int count = (comp_alias_ddrs.length ()
3615 			+ check_unequal_addrs.length ());
3616 
3617   dump_printf_loc (MSG_NOTE, vect_location,
3618 		   "improved number of alias checks from %d to %d\n",
3619 		   may_alias_ddrs.length (), count);
3620   if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
3621     {
3622       if (dump_enabled_p ())
3623 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3624 			 "number of versioning for alias "
3625 			 "run-time tests exceeds %d "
3626 			 "(--param vect-max-version-for-alias-checks)\n",
3627 			 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
3628       return false;
3629     }
3630 
3631   return true;
3632 }
3633 
3634 /* Check whether we can use an internal function for a gather load
3635    or scatter store.  READ_P is true for loads and false for stores.
3636    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
3637    the type of the memory elements being loaded or stored.  OFFSET_BITS
3638    is the number of bits in each scalar offset and OFFSET_SIGN is the
3639    sign of the offset.  SCALE is the amount by which the offset should
3640    be multiplied *after* it has been converted to address width.
3641 
3642    Return true if the function is supported, storing the function
3643    id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT.  */
3644 
3645 bool
3646 vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
3647 			  tree memory_type, unsigned int offset_bits,
3648 			  signop offset_sign, int scale,
3649 			  internal_fn *ifn_out, tree *element_type_out)
3650 {
3651   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
3652   unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
3653   if (offset_bits > element_bits)
3654     /* Internal functions require the offset to be the same width as
3655        the vector elements.  We can extend narrower offsets, but it isn't
3656        safe to truncate wider offsets.  */
3657     return false;
3658 
3659   if (element_bits != memory_bits)
3660     /* For now the vector elements must be the same width as the
3661        memory elements.  */
3662     return false;
3663 
3664   /* Work out which function we need.  */
3665   internal_fn ifn;
3666   if (read_p)
3667     ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
3668   else
3669     ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
3670 
3671   /* Test whether the target supports this combination.  */
3672   if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
3673 					       offset_sign, scale))
3674     return false;
3675 
3676   *ifn_out = ifn;
3677   *element_type_out = TREE_TYPE (vectype);
3678   return true;
3679 }
3680 
3681 /* CALL is a call to an internal gather load or scatter store function.
3682    Describe the operation in INFO.  */
3683 
3684 static void
3685 vect_describe_gather_scatter_call (gcall *call, gather_scatter_info *info)
3686 {
3687   stmt_vec_info stmt_info = vinfo_for_stmt (call);
3688   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3689   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3690 
3691   info->ifn = gimple_call_internal_fn (call);
3692   info->decl = NULL_TREE;
3693   info->base = gimple_call_arg (call, 0);
3694   info->offset = gimple_call_arg (call, 1);
3695   info->offset_dt = vect_unknown_def_type;
3696   info->offset_vectype = NULL_TREE;
3697   info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
3698   info->element_type = TREE_TYPE (vectype);
3699   info->memory_type = TREE_TYPE (DR_REF (dr));
3700 }
3701 
3702 /* Return true if a non-affine read or write in STMT is suitable for a
3703    gather load or scatter store.  Describe the operation in *INFO if so.  */
3704 
3705 bool
3706 vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
3707 			   gather_scatter_info *info)
3708 {
3709   HOST_WIDE_INT scale = 1;
3710   poly_int64 pbitpos, pbitsize;
3711   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3712   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3713   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3714   tree offtype = NULL_TREE;
3715   tree decl = NULL_TREE, base, off;
3716   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3717   tree memory_type = TREE_TYPE (DR_REF (dr));
3718   machine_mode pmode;
3719   int punsignedp, reversep, pvolatilep = 0;
3720   internal_fn ifn;
3721   tree element_type;
3722   bool masked_p = false;
3723 
3724   /* See whether this is already a call to a gather/scatter internal function.
3725      If not, see whether it's a masked load or store.  */
3726   gcall *call = dyn_cast <gcall *> (stmt);
3727   if (call && gimple_call_internal_p (call))
3728     {
3729       ifn = gimple_call_internal_fn (stmt);
3730       if (internal_gather_scatter_fn_p (ifn))
3731 	{
3732 	  vect_describe_gather_scatter_call (call, info);
3733 	  return true;
3734 	}
3735       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
3736     }
3737 
3738   /* True if we should aim to use internal functions rather than
3739      built-in functions.  */
3740   bool use_ifn_p = (DR_IS_READ (dr)
3741 		    ? supports_vec_gather_load_p ()
3742 		    : supports_vec_scatter_store_p ());
3743 
3744   base = DR_REF (dr);
3745   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
3746      see if we can use the def stmt of the address.  */
3747   if (masked_p
3748       && TREE_CODE (base) == MEM_REF
3749       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
3750       && integer_zerop (TREE_OPERAND (base, 1))
3751       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
3752     {
3753       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
3754       if (is_gimple_assign (def_stmt)
3755 	  && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
3756 	base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
3757     }
3758 
3759   /* The gather and scatter builtins need address of the form
3760      loop_invariant + vector * {1, 2, 4, 8}
3761      or
3762      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
3763      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
3764      of loop invariants/SSA_NAMEs defined in the loop, with casts,
3765      multiplications and additions in it.  To get a vector, we need
3766      a single SSA_NAME that will be defined in the loop and will
3767      contain everything that is not loop invariant and that can be
3768      vectorized.  The following code attempts to find such a preexistng
3769      SSA_NAME OFF and put the loop invariants into a tree BASE
3770      that can be gimplified before the loop.  */
3771   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
3772 			      &punsignedp, &reversep, &pvolatilep);
3773   gcc_assert (base && !reversep);
3774   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
3775 
3776   if (TREE_CODE (base) == MEM_REF)
3777     {
3778       if (!integer_zerop (TREE_OPERAND (base, 1)))
3779 	{
3780 	  if (off == NULL_TREE)
3781 	    off = wide_int_to_tree (sizetype, mem_ref_offset (base));
3782 	  else
3783 	    off = size_binop (PLUS_EXPR, off,
3784 			      fold_convert (sizetype, TREE_OPERAND (base, 1)));
3785 	}
3786       base = TREE_OPERAND (base, 0);
3787     }
3788   else
3789     base = build_fold_addr_expr (base);
3790 
3791   if (off == NULL_TREE)
3792     off = size_zero_node;
3793 
3794   /* If base is not loop invariant, either off is 0, then we start with just
3795      the constant offset in the loop invariant BASE and continue with base
3796      as OFF, otherwise give up.
3797      We could handle that case by gimplifying the addition of base + off
3798      into some SSA_NAME and use that as off, but for now punt.  */
3799   if (!expr_invariant_in_loop_p (loop, base))
3800     {
3801       if (!integer_zerop (off))
3802 	return false;
3803       off = base;
3804       base = size_int (pbytepos);
3805     }
3806   /* Otherwise put base + constant offset into the loop invariant BASE
3807      and continue with OFF.  */
3808   else
3809     {
3810       base = fold_convert (sizetype, base);
3811       base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
3812     }
3813 
3814   /* OFF at this point may be either a SSA_NAME or some tree expression
3815      from get_inner_reference.  Try to peel off loop invariants from it
3816      into BASE as long as possible.  */
3817   STRIP_NOPS (off);
3818   while (offtype == NULL_TREE)
3819     {
3820       enum tree_code code;
3821       tree op0, op1, add = NULL_TREE;
3822 
3823       if (TREE_CODE (off) == SSA_NAME)
3824 	{
3825 	  gimple *def_stmt = SSA_NAME_DEF_STMT (off);
3826 
3827 	  if (expr_invariant_in_loop_p (loop, off))
3828 	    return false;
3829 
3830 	  if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3831 	    break;
3832 
3833 	  op0 = gimple_assign_rhs1 (def_stmt);
3834 	  code = gimple_assign_rhs_code (def_stmt);
3835 	  op1 = gimple_assign_rhs2 (def_stmt);
3836 	}
3837       else
3838 	{
3839 	  if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3840 	    return false;
3841 	  code = TREE_CODE (off);
3842 	  extract_ops_from_tree (off, &code, &op0, &op1);
3843 	}
3844       switch (code)
3845 	{
3846 	case POINTER_PLUS_EXPR:
3847 	case PLUS_EXPR:
3848 	  if (expr_invariant_in_loop_p (loop, op0))
3849 	    {
3850 	      add = op0;
3851 	      off = op1;
3852 	    do_add:
3853 	      add = fold_convert (sizetype, add);
3854 	      if (scale != 1)
3855 		add = size_binop (MULT_EXPR, add, size_int (scale));
3856 	      base = size_binop (PLUS_EXPR, base, add);
3857 	      continue;
3858 	    }
3859 	  if (expr_invariant_in_loop_p (loop, op1))
3860 	    {
3861 	      add = op1;
3862 	      off = op0;
3863 	      goto do_add;
3864 	    }
3865 	  break;
3866 	case MINUS_EXPR:
3867 	  if (expr_invariant_in_loop_p (loop, op1))
3868 	    {
3869 	      add = fold_convert (sizetype, op1);
3870 	      add = size_binop (MINUS_EXPR, size_zero_node, add);
3871 	      off = op0;
3872 	      goto do_add;
3873 	    }
3874 	  break;
3875 	case MULT_EXPR:
3876 	  if (scale == 1 && tree_fits_shwi_p (op1))
3877 	    {
3878 	      int new_scale = tree_to_shwi (op1);
3879 	      /* Only treat this as a scaling operation if the target
3880 		 supports it.  */
3881 	      if (use_ifn_p
3882 		  && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p,
3883 						vectype, memory_type, 1,
3884 						TYPE_SIGN (TREE_TYPE (op0)),
3885 						new_scale, &ifn,
3886 						&element_type))
3887 		break;
3888 	      scale = new_scale;
3889 	      off = op0;
3890 	      continue;
3891 	    }
3892 	  break;
3893 	case SSA_NAME:
3894 	  off = op0;
3895 	  continue;
3896 	CASE_CONVERT:
3897 	  if (!POINTER_TYPE_P (TREE_TYPE (op0))
3898 	      && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3899 	    break;
3900 	  if (TYPE_PRECISION (TREE_TYPE (op0))
3901 	      == TYPE_PRECISION (TREE_TYPE (off)))
3902 	    {
3903 	      off = op0;
3904 	      continue;
3905 	    }
3906 
3907 	  /* The internal functions need the offset to be the same width
3908 	     as the elements of VECTYPE.  Don't include operations that
3909 	     cast the offset from that width to a different width.  */
3910 	  if (use_ifn_p
3911 	      && (int_size_in_bytes (TREE_TYPE (vectype))
3912 		  == int_size_in_bytes (TREE_TYPE (off))))
3913 	    break;
3914 
3915 	  if (TYPE_PRECISION (TREE_TYPE (op0))
3916 	      < TYPE_PRECISION (TREE_TYPE (off)))
3917 	    {
3918 	      off = op0;
3919 	      offtype = TREE_TYPE (off);
3920 	      STRIP_NOPS (off);
3921 	      continue;
3922 	    }
3923 	  break;
3924 	default:
3925 	  break;
3926 	}
3927       break;
3928     }
3929 
3930   /* If at the end OFF still isn't a SSA_NAME or isn't
3931      defined in the loop, punt.  */
3932   if (TREE_CODE (off) != SSA_NAME
3933       || expr_invariant_in_loop_p (loop, off))
3934     return false;
3935 
3936   if (offtype == NULL_TREE)
3937     offtype = TREE_TYPE (off);
3938 
3939   if (use_ifn_p)
3940     {
3941       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
3942 				     memory_type, TYPE_PRECISION (offtype),
3943 				     TYPE_SIGN (offtype), scale, &ifn,
3944 				     &element_type))
3945 	return false;
3946     }
3947   else
3948     {
3949       if (DR_IS_READ (dr))
3950 	{
3951 	  if (targetm.vectorize.builtin_gather)
3952 	    decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
3953 	}
3954       else
3955 	{
3956 	  if (targetm.vectorize.builtin_scatter)
3957 	    decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
3958 	}
3959 
3960       if (!decl)
3961 	return false;
3962 
3963       ifn = IFN_LAST;
3964       element_type = TREE_TYPE (vectype);
3965     }
3966 
3967   info->ifn = ifn;
3968   info->decl = decl;
3969   info->base = base;
3970   info->offset = off;
3971   info->offset_dt = vect_unknown_def_type;
3972   info->offset_vectype = NULL_TREE;
3973   info->scale = scale;
3974   info->element_type = element_type;
3975   info->memory_type = memory_type;
3976   return true;
3977 }
3978 
3979 /* Function vect_analyze_data_refs.
3980 
3981   Find all the data references in the loop or basic block.
3982 
3983    The general structure of the analysis of data refs in the vectorizer is as
3984    follows:
3985    1- vect_analyze_data_refs(loop/bb): call
3986       compute_data_dependences_for_loop/bb to find and analyze all data-refs
3987       in the loop/bb and their dependences.
3988    2- vect_analyze_dependences(): apply dependence testing using ddrs.
3989    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3990    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3991 
3992 */
3993 
3994 bool
3995 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
3996 {
3997   struct loop *loop = NULL;
3998   unsigned int i;
3999   struct data_reference *dr;
4000   tree scalar_type;
4001 
4002   if (dump_enabled_p ())
4003     dump_printf_loc (MSG_NOTE, vect_location,
4004 		     "=== vect_analyze_data_refs ===\n");
4005 
4006   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4007     loop = LOOP_VINFO_LOOP (loop_vinfo);
4008 
4009   /* Go through the data-refs, check that the analysis succeeded.  Update
4010      pointer from stmt_vec_info struct to DR and vectype.  */
4011 
4012   vec<data_reference_p> datarefs = vinfo->datarefs;
4013   FOR_EACH_VEC_ELT (datarefs, i, dr)
4014     {
4015       gimple *stmt;
4016       stmt_vec_info stmt_info;
4017       tree base, offset, init;
4018       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
4019       bool simd_lane_access = false;
4020       poly_uint64 vf;
4021 
4022 again:
4023       if (!dr || !DR_REF (dr))
4024         {
4025           if (dump_enabled_p ())
4026 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4027 	                     "not vectorized: unhandled data-ref\n");
4028           return false;
4029         }
4030 
4031       stmt = DR_STMT (dr);
4032       stmt_info = vinfo_for_stmt (stmt);
4033 
4034       /* Discard clobbers from the dataref vector.  We will remove
4035          clobber stmts during vectorization.  */
4036       if (gimple_clobber_p (stmt))
4037 	{
4038 	  free_data_ref (dr);
4039 	  if (i == datarefs.length () - 1)
4040 	    {
4041 	      datarefs.pop ();
4042 	      break;
4043 	    }
4044 	  datarefs.ordered_remove (i);
4045 	  dr = datarefs[i];
4046 	  goto again;
4047 	}
4048 
4049       /* Check that analysis of the data-ref succeeded.  */
4050       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
4051 	  || !DR_STEP (dr))
4052         {
4053 	  bool maybe_gather
4054 	    = DR_IS_READ (dr)
4055 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
4056 	      && (targetm.vectorize.builtin_gather != NULL
4057 		  || supports_vec_gather_load_p ());
4058 	  bool maybe_scatter
4059 	    = DR_IS_WRITE (dr)
4060 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
4061 	      && (targetm.vectorize.builtin_scatter != NULL
4062 		  || supports_vec_scatter_store_p ());
4063 	  bool maybe_simd_lane_access
4064 	    = is_a <loop_vec_info> (vinfo) && loop->simduid;
4065 
4066 	  /* If target supports vector gather loads or scatter stores, or if
4067 	     this might be a SIMD lane access, see if they can't be used.  */
4068 	  if (is_a <loop_vec_info> (vinfo)
4069 	      && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
4070 	      && !nested_in_vect_loop_p (loop, stmt))
4071 	    {
4072 	      struct data_reference *newdr
4073 		= create_data_ref (NULL, loop_containing_stmt (stmt),
4074 				   DR_REF (dr), stmt, !maybe_scatter,
4075 				   DR_IS_CONDITIONAL_IN_STMT (dr));
4076 	      gcc_assert (newdr != NULL && DR_REF (newdr));
4077 	      if (DR_BASE_ADDRESS (newdr)
4078 		  && DR_OFFSET (newdr)
4079 		  && DR_INIT (newdr)
4080 		  && DR_STEP (newdr)
4081 		  && integer_zerop (DR_STEP (newdr)))
4082 		{
4083 		  if (maybe_simd_lane_access)
4084 		    {
4085 		      tree off = DR_OFFSET (newdr);
4086 		      STRIP_NOPS (off);
4087 		      if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
4088 			  && TREE_CODE (off) == MULT_EXPR
4089 			  && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
4090 			{
4091 			  tree step = TREE_OPERAND (off, 1);
4092 			  off = TREE_OPERAND (off, 0);
4093 			  STRIP_NOPS (off);
4094 			  if (CONVERT_EXPR_P (off)
4095 			      && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
4096 									  0)))
4097 				 < TYPE_PRECISION (TREE_TYPE (off)))
4098 			    off = TREE_OPERAND (off, 0);
4099 			  if (TREE_CODE (off) == SSA_NAME)
4100 			    {
4101 			      gimple *def = SSA_NAME_DEF_STMT (off);
4102 			      tree reft = TREE_TYPE (DR_REF (newdr));
4103 			      if (is_gimple_call (def)
4104 				  && gimple_call_internal_p (def)
4105 				  && (gimple_call_internal_fn (def)
4106 				      == IFN_GOMP_SIMD_LANE))
4107 				{
4108 				  tree arg = gimple_call_arg (def, 0);
4109 				  gcc_assert (TREE_CODE (arg) == SSA_NAME);
4110 				  arg = SSA_NAME_VAR (arg);
4111 				  if (arg == loop->simduid
4112 				      /* For now.  */
4113 				      && tree_int_cst_equal
4114 					   (TYPE_SIZE_UNIT (reft),
4115 					    step))
4116 				    {
4117 				      DR_OFFSET (newdr) = ssize_int (0);
4118 				      DR_STEP (newdr) = step;
4119 				      DR_OFFSET_ALIGNMENT (newdr)
4120 					= BIGGEST_ALIGNMENT;
4121 				      DR_STEP_ALIGNMENT (newdr)
4122 					= highest_pow2_factor (step);
4123 				      dr = newdr;
4124 				      simd_lane_access = true;
4125 				    }
4126 				}
4127 			    }
4128 			}
4129 		    }
4130 		  if (!simd_lane_access && (maybe_gather || maybe_scatter))
4131 		    {
4132 		      dr = newdr;
4133 		      if (maybe_gather)
4134 			gatherscatter = GATHER;
4135 		      else
4136 			gatherscatter = SCATTER;
4137 		    }
4138 		}
4139 	      if (gatherscatter == SG_NONE && !simd_lane_access)
4140 		free_data_ref (newdr);
4141 	    }
4142 
4143 	  if (gatherscatter == SG_NONE && !simd_lane_access)
4144 	    {
4145 	      if (dump_enabled_p ())
4146 		{
4147 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4148                                    "not vectorized: data ref analysis "
4149                                    "failed ");
4150 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4151 		}
4152 
4153 	      if (is_a <bb_vec_info> (vinfo))
4154 		break;
4155 
4156 	      return false;
4157 	    }
4158         }
4159 
4160       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
4161         {
4162           if (dump_enabled_p ())
4163             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4164                              "not vectorized: base addr of dr is a "
4165                              "constant\n");
4166 
4167           if (is_a <bb_vec_info> (vinfo))
4168 	    break;
4169 
4170 	  if (gatherscatter != SG_NONE || simd_lane_access)
4171 	    free_data_ref (dr);
4172 	  return false;
4173         }
4174 
4175       if (TREE_THIS_VOLATILE (DR_REF (dr)))
4176         {
4177           if (dump_enabled_p ())
4178             {
4179               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4180                                "not vectorized: volatile type ");
4181               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4182             }
4183 
4184           if (is_a <bb_vec_info> (vinfo))
4185 	    break;
4186 
4187           return false;
4188         }
4189 
4190       if (stmt_can_throw_internal (stmt))
4191         {
4192           if (dump_enabled_p ())
4193             {
4194               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4195                                "not vectorized: statement can throw an "
4196                                "exception ");
4197               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4198             }
4199 
4200           if (is_a <bb_vec_info> (vinfo))
4201 	    break;
4202 
4203 	  if (gatherscatter != SG_NONE || simd_lane_access)
4204 	    free_data_ref (dr);
4205           return false;
4206         }
4207 
4208       if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
4209 	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
4210 	{
4211           if (dump_enabled_p ())
4212             {
4213               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4214                                "not vectorized: statement is bitfield "
4215                                "access ");
4216               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4217             }
4218 
4219           if (is_a <bb_vec_info> (vinfo))
4220 	    break;
4221 
4222 	  if (gatherscatter != SG_NONE || simd_lane_access)
4223 	    free_data_ref (dr);
4224           return false;
4225 	}
4226 
4227       base = unshare_expr (DR_BASE_ADDRESS (dr));
4228       offset = unshare_expr (DR_OFFSET (dr));
4229       init = unshare_expr (DR_INIT (dr));
4230 
4231       if (is_gimple_call (stmt)
4232 	  && (!gimple_call_internal_p (stmt)
4233 	      || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
4234 		  && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
4235 	{
4236 	  if (dump_enabled_p ())
4237 	    {
4238 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
4239 	                       "not vectorized: dr in a call ");
4240 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4241 	    }
4242 
4243 	  if (is_a <bb_vec_info> (vinfo))
4244 	    break;
4245 
4246 	  if (gatherscatter != SG_NONE || simd_lane_access)
4247 	    free_data_ref (dr);
4248 	  return false;
4249 	}
4250 
4251       /* Update DR field in stmt_vec_info struct.  */
4252 
4253       /* If the dataref is in an inner-loop of the loop that is considered for
4254 	 for vectorization, we also want to analyze the access relative to
4255 	 the outer-loop (DR contains information only relative to the
4256 	 inner-most enclosing loop).  We do that by building a reference to the
4257 	 first location accessed by the inner-loop, and analyze it relative to
4258 	 the outer-loop.  */
4259       if (loop && nested_in_vect_loop_p (loop, stmt))
4260 	{
4261 	  /* Build a reference to the first location accessed by the
4262 	     inner loop: *(BASE + INIT + OFFSET).  By construction,
4263 	     this address must be invariant in the inner loop, so we
4264 	     can consider it as being used in the outer loop.  */
4265 	  tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
4266 					  init, offset);
4267 	  tree init_addr = fold_build_pointer_plus (base, init_offset);
4268 	  tree init_ref = build_fold_indirect_ref (init_addr);
4269 
4270 	  if (dump_enabled_p ())
4271 	    {
4272 	      dump_printf_loc (MSG_NOTE, vect_location,
4273                                "analyze in outer loop: ");
4274 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
4275 	      dump_printf (MSG_NOTE, "\n");
4276 	    }
4277 
4278 	  if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
4279 				     init_ref, loop))
4280 	    /* dr_analyze_innermost already explained the failure.  */
4281 	    return false;
4282 
4283           if (dump_enabled_p ())
4284 	    {
4285 	      dump_printf_loc (MSG_NOTE, vect_location,
4286                                "\touter base_address: ");
4287 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
4288                                  STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
4289 	      dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
4290 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
4291                                  STMT_VINFO_DR_OFFSET (stmt_info));
4292 	      dump_printf (MSG_NOTE,
4293                            "\n\touter constant offset from base address: ");
4294 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
4295                                  STMT_VINFO_DR_INIT (stmt_info));
4296 	      dump_printf (MSG_NOTE, "\n\touter step: ");
4297 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
4298                                  STMT_VINFO_DR_STEP (stmt_info));
4299 	      dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
4300 			   STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
4301 	      dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
4302 			   STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
4303 	      dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
4304 			   STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
4305 	      dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
4306 			   STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
4307 	    }
4308 	}
4309 
4310       if (STMT_VINFO_DATA_REF (stmt_info))
4311         {
4312           if (dump_enabled_p ())
4313             {
4314               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4315                                "not vectorized: more than one data ref "
4316                                "in stmt: ");
4317               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4318             }
4319 
4320           if (is_a <bb_vec_info> (vinfo))
4321 	    break;
4322 
4323 	  if (gatherscatter != SG_NONE || simd_lane_access)
4324 	    free_data_ref (dr);
4325           return false;
4326         }
4327 
4328       STMT_VINFO_DATA_REF (stmt_info) = dr;
4329       if (simd_lane_access)
4330 	{
4331 	  STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
4332 	  free_data_ref (datarefs[i]);
4333 	  datarefs[i] = dr;
4334 	}
4335 
4336       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == ADDR_EXPR
4337 	  && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0))
4338 	  && DECL_NONALIASED (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0)))
4339 	{
4340           if (dump_enabled_p ())
4341             {
4342               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4343                                "not vectorized: base object not addressable "
4344 			       "for stmt: ");
4345               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4346             }
4347           if (is_a <bb_vec_info> (vinfo))
4348 	    {
4349 	      /* In BB vectorization the ref can still participate
4350 	         in dependence analysis, we just can't vectorize it.  */
4351 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4352 	      continue;
4353 	    }
4354 	  return false;
4355 	}
4356 
4357       /* Set vectype for STMT.  */
4358       scalar_type = TREE_TYPE (DR_REF (dr));
4359       STMT_VINFO_VECTYPE (stmt_info)
4360 	= get_vectype_for_scalar_type (scalar_type);
4361       if (!STMT_VINFO_VECTYPE (stmt_info))
4362         {
4363           if (dump_enabled_p ())
4364             {
4365               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4366                                "not vectorized: no vectype for stmt: ");
4367               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4368               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
4369               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
4370                                  scalar_type);
4371               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4372             }
4373 
4374           if (is_a <bb_vec_info> (vinfo))
4375 	    {
4376 	      /* No vector type is fine, the ref can still participate
4377 	         in dependence analysis, we just can't vectorize it.  */
4378 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4379 	      continue;
4380 	    }
4381 
4382 	  if (gatherscatter != SG_NONE || simd_lane_access)
4383 	    {
4384 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
4385 	      if (gatherscatter != SG_NONE)
4386 		free_data_ref (dr);
4387 	    }
4388 	  return false;
4389         }
4390       else
4391 	{
4392 	  if (dump_enabled_p ())
4393 	    {
4394 	      dump_printf_loc (MSG_NOTE, vect_location,
4395 			       "got vectype for stmt: ");
4396 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4397 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
4398 				 STMT_VINFO_VECTYPE (stmt_info));
4399 	      dump_printf (MSG_NOTE, "\n");
4400 	    }
4401 	}
4402 
4403       /* Adjust the minimal vectorization factor according to the
4404 	 vector type.  */
4405       vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
4406       *min_vf = upper_bound (*min_vf, vf);
4407 
4408       if (gatherscatter != SG_NONE)
4409 	{
4410 	  gather_scatter_info gs_info;
4411 	  if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
4412 					  &gs_info)
4413 	      || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
4414 	    {
4415 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
4416 	      free_data_ref (dr);
4417 	      if (dump_enabled_p ())
4418 		{
4419 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4420 				   (gatherscatter == GATHER) ?
4421 				   "not vectorized: not suitable for gather "
4422 				   "load " :
4423 				   "not vectorized: not suitable for scatter "
4424 				   "store ");
4425 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4426 		}
4427 	      return false;
4428 	    }
4429 
4430 	  free_data_ref (datarefs[i]);
4431 	  datarefs[i] = dr;
4432 	  STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
4433 	}
4434 
4435       else if (is_a <loop_vec_info> (vinfo)
4436 	       && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
4437 	{
4438 	  if (nested_in_vect_loop_p (loop, stmt))
4439 	    {
4440 	      if (dump_enabled_p ())
4441 		{
4442 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4443                                    "not vectorized: not suitable for strided "
4444                                    "load ");
4445 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4446 		}
4447 	      return false;
4448 	    }
4449 	  STMT_VINFO_STRIDED_P (stmt_info) = true;
4450 	}
4451     }
4452 
4453   /* If we stopped analysis at the first dataref we could not analyze
4454      when trying to vectorize a basic-block mark the rest of the datarefs
4455      as not vectorizable and truncate the vector of datarefs.  That
4456      avoids spending useless time in analyzing their dependence.  */
4457   if (i != datarefs.length ())
4458     {
4459       gcc_assert (is_a <bb_vec_info> (vinfo));
4460       for (unsigned j = i; j < datarefs.length (); ++j)
4461 	{
4462 	  data_reference_p dr = datarefs[j];
4463           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
4464 	  free_data_ref (dr);
4465 	}
4466       datarefs.truncate (i);
4467     }
4468 
4469   return true;
4470 }
4471 
4472 
4473 /* Function vect_get_new_vect_var.
4474 
4475    Returns a name for a new variable.  The current naming scheme appends the
4476    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
4477    the name of vectorizer generated variables, and appends that to NAME if
4478    provided.  */
4479 
4480 tree
4481 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
4482 {
4483   const char *prefix;
4484   tree new_vect_var;
4485 
4486   switch (var_kind)
4487   {
4488   case vect_simple_var:
4489     prefix = "vect";
4490     break;
4491   case vect_scalar_var:
4492     prefix = "stmp";
4493     break;
4494   case vect_mask_var:
4495     prefix = "mask";
4496     break;
4497   case vect_pointer_var:
4498     prefix = "vectp";
4499     break;
4500   default:
4501     gcc_unreachable ();
4502   }
4503 
4504   if (name)
4505     {
4506       char* tmp = concat (prefix, "_", name, NULL);
4507       new_vect_var = create_tmp_reg (type, tmp);
4508       free (tmp);
4509     }
4510   else
4511     new_vect_var = create_tmp_reg (type, prefix);
4512 
4513   return new_vect_var;
4514 }
4515 
4516 /* Like vect_get_new_vect_var but return an SSA name.  */
4517 
4518 tree
4519 vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
4520 {
4521   const char *prefix;
4522   tree new_vect_var;
4523 
4524   switch (var_kind)
4525   {
4526   case vect_simple_var:
4527     prefix = "vect";
4528     break;
4529   case vect_scalar_var:
4530     prefix = "stmp";
4531     break;
4532   case vect_pointer_var:
4533     prefix = "vectp";
4534     break;
4535   default:
4536     gcc_unreachable ();
4537   }
4538 
4539   if (name)
4540     {
4541       char* tmp = concat (prefix, "_", name, NULL);
4542       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
4543       free (tmp);
4544     }
4545   else
4546     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
4547 
4548   return new_vect_var;
4549 }
4550 
4551 /* Duplicate ptr info and set alignment/misaligment on NAME from DR.  */
4552 
4553 static void
4554 vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr)
4555 {
4556   duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
4557   int misalign = DR_MISALIGNMENT (dr);
4558   if (misalign == DR_MISALIGNMENT_UNKNOWN)
4559     mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4560   else
4561     set_ptr_info_alignment (SSA_NAME_PTR_INFO (name),
4562 			    DR_TARGET_ALIGNMENT (dr), misalign);
4563 }
4564 
4565 /* Function vect_create_addr_base_for_vector_ref.
4566 
4567    Create an expression that computes the address of the first memory location
4568    that will be accessed for a data reference.
4569 
4570    Input:
4571    STMT: The statement containing the data reference.
4572    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
4573    OFFSET: Optional. If supplied, it is be added to the initial address.
4574    LOOP:    Specify relative to which loop-nest should the address be computed.
4575             For example, when the dataref is in an inner-loop nested in an
4576 	    outer-loop that is now being vectorized, LOOP can be either the
4577 	    outer-loop, or the inner-loop.  The first memory location accessed
4578 	    by the following dataref ('in' points to short):
4579 
4580 		for (i=0; i<N; i++)
4581 		   for (j=0; j<M; j++)
4582 		     s += in[i+j]
4583 
4584 	    is as follows:
4585 	    if LOOP=i_loop:	&in		(relative to i_loop)
4586 	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
4587    BYTE_OFFSET: Optional, defaulted to NULL.  If supplied, it is added to the
4588 	    initial address.  Unlike OFFSET, which is number of elements to
4589 	    be added, BYTE_OFFSET is measured in bytes.
4590 
4591    Output:
4592    1. Return an SSA_NAME whose value is the address of the memory location of
4593       the first vector of the data reference.
4594    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
4595       these statement(s) which define the returned SSA_NAME.
4596 
4597    FORNOW: We are only handling array accesses with step 1.  */
4598 
4599 tree
4600 vect_create_addr_base_for_vector_ref (gimple *stmt,
4601 				      gimple_seq *new_stmt_list,
4602 				      tree offset,
4603 				      tree byte_offset)
4604 {
4605   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4606   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4607   const char *base_name;
4608   tree addr_base;
4609   tree dest;
4610   gimple_seq seq = NULL;
4611   tree vect_ptr_type;
4612   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
4613   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4614   innermost_loop_behavior *drb = vect_dr_behavior (dr);
4615 
4616   tree data_ref_base = unshare_expr (drb->base_address);
4617   tree base_offset = unshare_expr (drb->offset);
4618   tree init = unshare_expr (drb->init);
4619 
4620   if (loop_vinfo)
4621     base_name = get_name (data_ref_base);
4622   else
4623     {
4624       base_offset = ssize_int (0);
4625       init = ssize_int (0);
4626       base_name = get_name (DR_REF (dr));
4627     }
4628 
4629   /* Create base_offset */
4630   base_offset = size_binop (PLUS_EXPR,
4631 			    fold_convert (sizetype, base_offset),
4632 			    fold_convert (sizetype, init));
4633 
4634   if (offset)
4635     {
4636       offset = fold_build2 (MULT_EXPR, sizetype,
4637 			    fold_convert (sizetype, offset), step);
4638       base_offset = fold_build2 (PLUS_EXPR, sizetype,
4639 				 base_offset, offset);
4640     }
4641   if (byte_offset)
4642     {
4643       byte_offset = fold_convert (sizetype, byte_offset);
4644       base_offset = fold_build2 (PLUS_EXPR, sizetype,
4645 				 base_offset, byte_offset);
4646     }
4647 
4648   /* base + base_offset */
4649   if (loop_vinfo)
4650     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
4651   else
4652     {
4653       addr_base = build1 (ADDR_EXPR,
4654 			  build_pointer_type (TREE_TYPE (DR_REF (dr))),
4655 			  unshare_expr (DR_REF (dr)));
4656     }
4657 
4658   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
4659   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
4660   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
4661   gimple_seq_add_seq (new_stmt_list, seq);
4662 
4663   if (DR_PTR_INFO (dr)
4664       && TREE_CODE (addr_base) == SSA_NAME
4665       && !SSA_NAME_PTR_INFO (addr_base))
4666     {
4667       vect_duplicate_ssa_name_ptr_info (addr_base, dr);
4668       if (offset || byte_offset)
4669 	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
4670     }
4671 
4672   if (dump_enabled_p ())
4673     {
4674       dump_printf_loc (MSG_NOTE, vect_location, "created ");
4675       dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
4676       dump_printf (MSG_NOTE, "\n");
4677     }
4678 
4679   return addr_base;
4680 }
4681 
4682 
4683 /* Function vect_create_data_ref_ptr.
4684 
4685    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
4686    location accessed in the loop by STMT, along with the def-use update
4687    chain to appropriately advance the pointer through the loop iterations.
4688    Also set aliasing information for the pointer.  This pointer is used by
4689    the callers to this function to create a memory reference expression for
4690    vector load/store access.
4691 
4692    Input:
4693    1. STMT: a stmt that references memory. Expected to be of the form
4694          GIMPLE_ASSIGN <name, data-ref> or
4695 	 GIMPLE_ASSIGN <data-ref, name>.
4696    2. AGGR_TYPE: the type of the reference, which should be either a vector
4697         or an array.
4698    3. AT_LOOP: the loop where the vector memref is to be created.
4699    4. OFFSET (optional): an offset to be added to the initial address accessed
4700         by the data-ref in STMT.
4701    5. BSI: location where the new stmts are to be placed if there is no loop
4702    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
4703         pointing to the initial address.
4704    7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
4705 	to the initial address accessed by the data-ref in STMT.  This is
4706 	similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
4707 	in bytes.
4708    8. IV_STEP (optional, defaults to NULL): the amount that should be added
4709 	to the IV during each iteration of the loop.  NULL says to move
4710 	by one copy of AGGR_TYPE up or down, depending on the step of the
4711 	data reference.
4712 
4713    Output:
4714    1. Declare a new ptr to vector_type, and have it point to the base of the
4715       data reference (initial addressed accessed by the data reference).
4716       For example, for vector of type V8HI, the following code is generated:
4717 
4718       v8hi *ap;
4719       ap = (v8hi *)initial_address;
4720 
4721       if OFFSET is not supplied:
4722          initial_address = &a[init];
4723       if OFFSET is supplied:
4724          initial_address = &a[init + OFFSET];
4725       if BYTE_OFFSET is supplied:
4726 	 initial_address = &a[init] + BYTE_OFFSET;
4727 
4728       Return the initial_address in INITIAL_ADDRESS.
4729 
4730    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
4731       update the pointer in each iteration of the loop.
4732 
4733       Return the increment stmt that updates the pointer in PTR_INCR.
4734 
4735    3. Set INV_P to true if the access pattern of the data reference in the
4736       vectorized loop is invariant.  Set it to false otherwise.
4737 
4738    4. Return the pointer.  */
4739 
4740 tree
4741 vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
4742 			  tree offset, tree *initial_address,
4743 			  gimple_stmt_iterator *gsi, gimple **ptr_incr,
4744 			  bool only_init, bool *inv_p, tree byte_offset,
4745 			  tree iv_step)
4746 {
4747   const char *base_name;
4748   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4749   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4750   struct loop *loop = NULL;
4751   bool nested_in_vect_loop = false;
4752   struct loop *containing_loop = NULL;
4753   tree aggr_ptr_type;
4754   tree aggr_ptr;
4755   tree new_temp;
4756   gimple_seq new_stmt_list = NULL;
4757   edge pe = NULL;
4758   basic_block new_bb;
4759   tree aggr_ptr_init;
4760   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4761   tree aptr;
4762   gimple_stmt_iterator incr_gsi;
4763   bool insert_after;
4764   tree indx_before_incr, indx_after_incr;
4765   gimple *incr;
4766   tree step;
4767   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4768 
4769   gcc_assert (iv_step != NULL_TREE
4770 	      || TREE_CODE (aggr_type) == ARRAY_TYPE
4771 	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
4772 
4773   if (loop_vinfo)
4774     {
4775       loop = LOOP_VINFO_LOOP (loop_vinfo);
4776       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4777       containing_loop = (gimple_bb (stmt))->loop_father;
4778       pe = loop_preheader_edge (loop);
4779     }
4780   else
4781     {
4782       gcc_assert (bb_vinfo);
4783       only_init = true;
4784       *ptr_incr = NULL;
4785     }
4786 
4787   /* Check the step (evolution) of the load in LOOP, and record
4788      whether it's invariant.  */
4789   step = vect_dr_behavior (dr)->step;
4790   if (integer_zerop (step))
4791     *inv_p = true;
4792   else
4793     *inv_p = false;
4794 
4795   /* Create an expression for the first address accessed by this load
4796      in LOOP.  */
4797   base_name = get_name (DR_BASE_ADDRESS (dr));
4798 
4799   if (dump_enabled_p ())
4800     {
4801       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
4802       dump_printf_loc (MSG_NOTE, vect_location,
4803                        "create %s-pointer variable to type: ",
4804 		       get_tree_code_name (TREE_CODE (aggr_type)));
4805       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
4806       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
4807         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
4808       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4809         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
4810       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
4811         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
4812       else
4813         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
4814       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
4815       dump_printf (MSG_NOTE, "\n");
4816     }
4817 
4818   /* (1) Create the new aggregate-pointer variable.
4819      Vector and array types inherit the alias set of their component
4820      type by default so we need to use a ref-all pointer if the data
4821      reference does not conflict with the created aggregated data
4822      reference because it is not addressable.  */
4823   bool need_ref_all = false;
4824   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4825 			      get_alias_set (DR_REF (dr))))
4826     need_ref_all = true;
4827   /* Likewise for any of the data references in the stmt group.  */
4828   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
4829     {
4830       gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
4831       do
4832 	{
4833 	  stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
4834 	  struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4835 	  if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4836 				      get_alias_set (DR_REF (sdr))))
4837 	    {
4838 	      need_ref_all = true;
4839 	      break;
4840 	    }
4841 	  orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
4842 	}
4843       while (orig_stmt);
4844     }
4845   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4846 					       need_ref_all);
4847   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4848 
4849 
4850   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4851      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4852      def-use update cycles for the pointer: one relative to the outer-loop
4853      (LOOP), which is what steps (3) and (4) below do.  The other is relative
4854      to the inner-loop (which is the inner-most loop containing the dataref),
4855      and this is done be step (5) below.
4856 
4857      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4858      inner-most loop, and so steps (3),(4) work the same, and step (5) is
4859      redundant.  Steps (3),(4) create the following:
4860 
4861 	vp0 = &base_addr;
4862 	LOOP:	vp1 = phi(vp0,vp2)
4863 		...
4864 		...
4865 		vp2 = vp1 + step
4866 		goto LOOP
4867 
4868      If there is an inner-loop nested in loop, then step (5) will also be
4869      applied, and an additional update in the inner-loop will be created:
4870 
4871 	vp0 = &base_addr;
4872 	LOOP:   vp1 = phi(vp0,vp2)
4873 		...
4874         inner:     vp3 = phi(vp1,vp4)
4875 	           vp4 = vp3 + inner_step
4876 	           if () goto inner
4877 		...
4878 		vp2 = vp1 + step
4879 		if () goto LOOP   */
4880 
4881   /* (2) Calculate the initial address of the aggregate-pointer, and set
4882      the aggregate-pointer to point to it before the loop.  */
4883 
4884   /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader.  */
4885 
4886   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4887 						   offset, byte_offset);
4888   if (new_stmt_list)
4889     {
4890       if (pe)
4891         {
4892           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4893           gcc_assert (!new_bb);
4894         }
4895       else
4896         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
4897     }
4898 
4899   *initial_address = new_temp;
4900   aggr_ptr_init = new_temp;
4901 
4902   /* (3) Handle the updating of the aggregate-pointer inside the loop.
4903      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4904      inner-loop nested in LOOP (during outer-loop vectorization).  */
4905 
4906   /* No update in loop is required.  */
4907   if (only_init && (!loop_vinfo || at_loop == loop))
4908     aptr = aggr_ptr_init;
4909   else
4910     {
4911       if (iv_step == NULL_TREE)
4912 	{
4913 	  /* The step of the aggregate pointer is the type size.  */
4914 	  iv_step = TYPE_SIZE_UNIT (aggr_type);
4915 	  /* One exception to the above is when the scalar step of the load in
4916 	     LOOP is zero. In this case the step here is also zero.  */
4917 	  if (*inv_p)
4918 	    iv_step = size_zero_node;
4919 	  else if (tree_int_cst_sgn (step) == -1)
4920 	    iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
4921 	}
4922 
4923       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4924 
4925       create_iv (aggr_ptr_init,
4926 		 fold_convert (aggr_ptr_type, iv_step),
4927 		 aggr_ptr, loop, &incr_gsi, insert_after,
4928 		 &indx_before_incr, &indx_after_incr);
4929       incr = gsi_stmt (incr_gsi);
4930       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
4931 
4932       /* Copy the points-to information if it exists. */
4933       if (DR_PTR_INFO (dr))
4934 	{
4935 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
4936 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
4937 	}
4938       if (ptr_incr)
4939 	*ptr_incr = incr;
4940 
4941       aptr = indx_before_incr;
4942     }
4943 
4944   if (!nested_in_vect_loop || only_init)
4945     return aptr;
4946 
4947 
4948   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
4949      nested in LOOP, if exists.  */
4950 
4951   gcc_assert (nested_in_vect_loop);
4952   if (!only_init)
4953     {
4954       standard_iv_increment_position (containing_loop, &incr_gsi,
4955 				      &insert_after);
4956       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
4957 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4958 		 &indx_after_incr);
4959       incr = gsi_stmt (incr_gsi);
4960       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
4961 
4962       /* Copy the points-to information if it exists. */
4963       if (DR_PTR_INFO (dr))
4964 	{
4965 	  vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
4966 	  vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
4967 	}
4968       if (ptr_incr)
4969 	*ptr_incr = incr;
4970 
4971       return indx_before_incr;
4972     }
4973   else
4974     gcc_unreachable ();
4975 }
4976 
4977 
4978 /* Function bump_vector_ptr
4979 
4980    Increment a pointer (to a vector type) by vector-size. If requested,
4981    i.e. if PTR-INCR is given, then also connect the new increment stmt
4982    to the existing def-use update-chain of the pointer, by modifying
4983    the PTR_INCR as illustrated below:
4984 
4985    The pointer def-use update-chain before this function:
4986                         DATAREF_PTR = phi (p_0, p_2)
4987                         ....
4988         PTR_INCR:       p_2 = DATAREF_PTR + step
4989 
4990    The pointer def-use update-chain after this function:
4991                         DATAREF_PTR = phi (p_0, p_2)
4992                         ....
4993                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4994                         ....
4995         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
4996 
4997    Input:
4998    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4999                  in the loop.
5000    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
5001 	      the loop.  The increment amount across iterations is expected
5002 	      to be vector_size.
5003    BSI - location where the new update stmt is to be placed.
5004    STMT - the original scalar memory-access stmt that is being vectorized.
5005    BUMP - optional. The offset by which to bump the pointer. If not given,
5006 	  the offset is assumed to be vector_size.
5007 
5008    Output: Return NEW_DATAREF_PTR as illustrated above.
5009 
5010 */
5011 
5012 tree
5013 bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
5014 		 gimple *stmt, tree bump)
5015 {
5016   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5017   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5018   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5019   tree update = TYPE_SIZE_UNIT (vectype);
5020   gassign *incr_stmt;
5021   ssa_op_iter iter;
5022   use_operand_p use_p;
5023   tree new_dataref_ptr;
5024 
5025   if (bump)
5026     update = bump;
5027 
5028   if (TREE_CODE (dataref_ptr) == SSA_NAME)
5029     new_dataref_ptr = copy_ssa_name (dataref_ptr);
5030   else
5031     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
5032   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
5033 				   dataref_ptr, update);
5034   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
5035 
5036   /* Copy the points-to information if it exists. */
5037   if (DR_PTR_INFO (dr))
5038     {
5039       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
5040       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
5041     }
5042 
5043   if (!ptr_incr)
5044     return new_dataref_ptr;
5045 
5046   /* Update the vector-pointer's cross-iteration increment.  */
5047   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
5048     {
5049       tree use = USE_FROM_PTR (use_p);
5050 
5051       if (use == dataref_ptr)
5052         SET_USE (use_p, new_dataref_ptr);
5053       else
5054         gcc_assert (operand_equal_p (use, update, 0));
5055     }
5056 
5057   return new_dataref_ptr;
5058 }
5059 
5060 
5061 /* Copy memory reference info such as base/clique from the SRC reference
5062    to the DEST MEM_REF.  */
5063 
5064 void
5065 vect_copy_ref_info (tree dest, tree src)
5066 {
5067   if (TREE_CODE (dest) != MEM_REF)
5068     return;
5069 
5070   tree src_base = src;
5071   while (handled_component_p (src_base))
5072     src_base = TREE_OPERAND (src_base, 0);
5073   if (TREE_CODE (src_base) != MEM_REF
5074       && TREE_CODE (src_base) != TARGET_MEM_REF)
5075     return;
5076 
5077   MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
5078   MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
5079 }
5080 
5081 
5082 /* Function vect_create_destination_var.
5083 
5084    Create a new temporary of type VECTYPE.  */
5085 
5086 tree
5087 vect_create_destination_var (tree scalar_dest, tree vectype)
5088 {
5089   tree vec_dest;
5090   const char *name;
5091   char *new_name;
5092   tree type;
5093   enum vect_var_kind kind;
5094 
5095   kind = vectype
5096     ? VECTOR_BOOLEAN_TYPE_P (vectype)
5097     ? vect_mask_var
5098     : vect_simple_var
5099     : vect_scalar_var;
5100   type = vectype ? vectype : TREE_TYPE (scalar_dest);
5101 
5102   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
5103 
5104   name = get_name (scalar_dest);
5105   if (name)
5106     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
5107   else
5108     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
5109   vec_dest = vect_get_new_vect_var (type, kind, new_name);
5110   free (new_name);
5111 
5112   return vec_dest;
5113 }
5114 
5115 /* Function vect_grouped_store_supported.
5116 
5117    Returns TRUE if interleave high and interleave low permutations
5118    are supported, and FALSE otherwise.  */
5119 
5120 bool
5121 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
5122 {
5123   machine_mode mode = TYPE_MODE (vectype);
5124 
5125   /* vect_permute_store_chain requires the group size to be equal to 3 or
5126      be a power of two.  */
5127   if (count != 3 && exact_log2 (count) == -1)
5128     {
5129       if (dump_enabled_p ())
5130 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5131 			 "the size of the group of accesses"
5132 			 " is not a power of 2 or not eqaul to 3\n");
5133       return false;
5134     }
5135 
5136   /* Check that the permutation is supported.  */
5137   if (VECTOR_MODE_P (mode))
5138     {
5139       unsigned int i;
5140       if (count == 3)
5141 	{
5142 	  unsigned int j0 = 0, j1 = 0, j2 = 0;
5143 	  unsigned int i, j;
5144 
5145 	  unsigned int nelt;
5146 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5147 	    {
5148 	      if (dump_enabled_p ())
5149 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5150 				 "cannot handle groups of 3 stores for"
5151 				 " variable-length vectors\n");
5152 	      return false;
5153 	    }
5154 
5155 	  vec_perm_builder sel (nelt, nelt, 1);
5156 	  sel.quick_grow (nelt);
5157 	  vec_perm_indices indices;
5158 	  for (j = 0; j < 3; j++)
5159 	    {
5160 	      int nelt0 = ((3 - j) * nelt) % 3;
5161 	      int nelt1 = ((3 - j) * nelt + 1) % 3;
5162 	      int nelt2 = ((3 - j) * nelt + 2) % 3;
5163 	      for (i = 0; i < nelt; i++)
5164 		{
5165 		  if (3 * i + nelt0 < nelt)
5166 		    sel[3 * i + nelt0] = j0++;
5167 		  if (3 * i + nelt1 < nelt)
5168 		    sel[3 * i + nelt1] = nelt + j1++;
5169 		  if (3 * i + nelt2 < nelt)
5170 		    sel[3 * i + nelt2] = 0;
5171 		}
5172 	      indices.new_vector (sel, 2, nelt);
5173 	      if (!can_vec_perm_const_p (mode, indices))
5174 		{
5175 		  if (dump_enabled_p ())
5176 		    dump_printf (MSG_MISSED_OPTIMIZATION,
5177 				 "permutation op not supported by target.\n");
5178 		  return false;
5179 		}
5180 
5181 	      for (i = 0; i < nelt; i++)
5182 		{
5183 		  if (3 * i + nelt0 < nelt)
5184 		    sel[3 * i + nelt0] = 3 * i + nelt0;
5185 		  if (3 * i + nelt1 < nelt)
5186 		    sel[3 * i + nelt1] = 3 * i + nelt1;
5187 		  if (3 * i + nelt2 < nelt)
5188 		    sel[3 * i + nelt2] = nelt + j2++;
5189 		}
5190 	      indices.new_vector (sel, 2, nelt);
5191 	      if (!can_vec_perm_const_p (mode, indices))
5192 		{
5193 		  if (dump_enabled_p ())
5194 		    dump_printf (MSG_MISSED_OPTIMIZATION,
5195 				 "permutation op not supported by target.\n");
5196 		  return false;
5197 		}
5198 	    }
5199 	  return true;
5200 	}
5201       else
5202 	{
5203 	  /* If length is not equal to 3 then only power of 2 is supported.  */
5204 	  gcc_assert (pow2p_hwi (count));
5205 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
5206 
5207 	  /* The encoding has 2 interleaved stepped patterns.  */
5208 	  vec_perm_builder sel (nelt, 2, 3);
5209 	  sel.quick_grow (6);
5210 	  for (i = 0; i < 3; i++)
5211 	    {
5212 	      sel[i * 2] = i;
5213 	      sel[i * 2 + 1] = i + nelt;
5214 	    }
5215 	  vec_perm_indices indices (sel, 2, nelt);
5216 	  if (can_vec_perm_const_p (mode, indices))
5217 	    {
5218 	      for (i = 0; i < 6; i++)
5219 		sel[i] += exact_div (nelt, 2);
5220 	      indices.new_vector (sel, 2, nelt);
5221 	      if (can_vec_perm_const_p (mode, indices))
5222 		return true;
5223 	    }
5224 	}
5225     }
5226 
5227   if (dump_enabled_p ())
5228     dump_printf (MSG_MISSED_OPTIMIZATION,
5229 		 "permutaion op not supported by target.\n");
5230   return false;
5231 }
5232 
5233 
5234 /* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
5235    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
5236 
5237 bool
5238 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5239 			    bool masked_p)
5240 {
5241   if (masked_p)
5242     return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
5243 					 vec_mask_store_lanes_optab,
5244 					 vectype, count);
5245   else
5246     return vect_lanes_optab_supported_p ("vec_store_lanes",
5247 					 vec_store_lanes_optab,
5248 					 vectype, count);
5249 }
5250 
5251 
5252 /* Function vect_permute_store_chain.
5253 
5254    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
5255    a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
5256    the data correctly for the stores.  Return the final references for stores
5257    in RESULT_CHAIN.
5258 
5259    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5260    The input is 4 vectors each containing 8 elements.  We assign a number to
5261    each element, the input sequence is:
5262 
5263    1st vec:   0  1  2  3  4  5  6  7
5264    2nd vec:   8  9 10 11 12 13 14 15
5265    3rd vec:  16 17 18 19 20 21 22 23
5266    4th vec:  24 25 26 27 28 29 30 31
5267 
5268    The output sequence should be:
5269 
5270    1st vec:  0  8 16 24  1  9 17 25
5271    2nd vec:  2 10 18 26  3 11 19 27
5272    3rd vec:  4 12 20 28  5 13 21 30
5273    4th vec:  6 14 22 30  7 15 23 31
5274 
5275    i.e., we interleave the contents of the four vectors in their order.
5276 
5277    We use interleave_high/low instructions to create such output.  The input of
5278    each interleave_high/low operation is two vectors:
5279    1st vec    2nd vec
5280    0 1 2 3    4 5 6 7
5281    the even elements of the result vector are obtained left-to-right from the
5282    high/low elements of the first vector.  The odd elements of the result are
5283    obtained left-to-right from the high/low elements of the second vector.
5284    The output of interleave_high will be:   0 4 1 5
5285    and of interleave_low:                   2 6 3 7
5286 
5287 
5288    The permutation is done in log LENGTH stages.  In each stage interleave_high
5289    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
5290    where the first argument is taken from the first half of DR_CHAIN and the
5291    second argument from it's second half.
5292    In our example,
5293 
5294    I1: interleave_high (1st vec, 3rd vec)
5295    I2: interleave_low (1st vec, 3rd vec)
5296    I3: interleave_high (2nd vec, 4th vec)
5297    I4: interleave_low (2nd vec, 4th vec)
5298 
5299    The output for the first stage is:
5300 
5301    I1:  0 16  1 17  2 18  3 19
5302    I2:  4 20  5 21  6 22  7 23
5303    I3:  8 24  9 25 10 26 11 27
5304    I4: 12 28 13 29 14 30 15 31
5305 
5306    The output of the second stage, i.e. the final result is:
5307 
5308    I1:  0  8 16 24  1  9 17 25
5309    I2:  2 10 18 26  3 11 19 27
5310    I3:  4 12 20 28  5 13 21 30
5311    I4:  6 14 22 30  7 15 23 31.  */
5312 
5313 void
5314 vect_permute_store_chain (vec<tree> dr_chain,
5315 			  unsigned int length,
5316 			  gimple *stmt,
5317 			  gimple_stmt_iterator *gsi,
5318 			  vec<tree> *result_chain)
5319 {
5320   tree vect1, vect2, high, low;
5321   gimple *perm_stmt;
5322   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5323   tree perm_mask_low, perm_mask_high;
5324   tree data_ref;
5325   tree perm3_mask_low, perm3_mask_high;
5326   unsigned int i, j, n, log_length = exact_log2 (length);
5327 
5328   result_chain->quick_grow (length);
5329   memcpy (result_chain->address (), dr_chain.address (),
5330 	  length * sizeof (tree));
5331 
5332   if (length == 3)
5333     {
5334       /* vect_grouped_store_supported ensures that this is constant.  */
5335       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5336       unsigned int j0 = 0, j1 = 0, j2 = 0;
5337 
5338       vec_perm_builder sel (nelt, nelt, 1);
5339       sel.quick_grow (nelt);
5340       vec_perm_indices indices;
5341       for (j = 0; j < 3; j++)
5342         {
5343 	  int nelt0 = ((3 - j) * nelt) % 3;
5344 	  int nelt1 = ((3 - j) * nelt + 1) % 3;
5345 	  int nelt2 = ((3 - j) * nelt + 2) % 3;
5346 
5347 	  for (i = 0; i < nelt; i++)
5348 	    {
5349 	      if (3 * i + nelt0 < nelt)
5350 		sel[3 * i + nelt0] = j0++;
5351 	      if (3 * i + nelt1 < nelt)
5352 		sel[3 * i + nelt1] = nelt + j1++;
5353 	      if (3 * i + nelt2 < nelt)
5354 		sel[3 * i + nelt2] = 0;
5355 	    }
5356 	  indices.new_vector (sel, 2, nelt);
5357 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5358 
5359 	  for (i = 0; i < nelt; i++)
5360 	    {
5361 	      if (3 * i + nelt0 < nelt)
5362 		sel[3 * i + nelt0] = 3 * i + nelt0;
5363 	      if (3 * i + nelt1 < nelt)
5364 		sel[3 * i + nelt1] = 3 * i + nelt1;
5365 	      if (3 * i + nelt2 < nelt)
5366 		sel[3 * i + nelt2] = nelt + j2++;
5367 	    }
5368 	  indices.new_vector (sel, 2, nelt);
5369 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5370 
5371 	  vect1 = dr_chain[0];
5372 	  vect2 = dr_chain[1];
5373 
5374 	  /* Create interleaving stmt:
5375 	     low = VEC_PERM_EXPR <vect1, vect2,
5376 				  {j, nelt, *, j + 1, nelt + j + 1, *,
5377 				   j + 2, nelt + j + 2, *, ...}>  */
5378 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
5379 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5380 					   vect2, perm3_mask_low);
5381 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5382 
5383 	  vect1 = data_ref;
5384 	  vect2 = dr_chain[2];
5385 	  /* Create interleaving stmt:
5386 	     low = VEC_PERM_EXPR <vect1, vect2,
5387 				  {0, 1, nelt + j, 3, 4, nelt + j + 1,
5388 				   6, 7, nelt + j + 2, ...}>  */
5389 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
5390 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5391 					   vect2, perm3_mask_high);
5392 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5393 	  (*result_chain)[j] = data_ref;
5394 	}
5395     }
5396   else
5397     {
5398       /* If length is not equal to 3 then only power of 2 is supported.  */
5399       gcc_assert (pow2p_hwi (length));
5400 
5401       /* The encoding has 2 interleaved stepped patterns.  */
5402       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
5403       vec_perm_builder sel (nelt, 2, 3);
5404       sel.quick_grow (6);
5405       for (i = 0; i < 3; i++)
5406 	{
5407 	  sel[i * 2] = i;
5408 	  sel[i * 2 + 1] = i + nelt;
5409 	}
5410 	vec_perm_indices indices (sel, 2, nelt);
5411 	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5412 
5413 	for (i = 0; i < 6; i++)
5414 	  sel[i] += exact_div (nelt, 2);
5415 	indices.new_vector (sel, 2, nelt);
5416 	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5417 
5418 	for (i = 0, n = log_length; i < n; i++)
5419 	  {
5420 	    for (j = 0; j < length/2; j++)
5421 	      {
5422 		vect1 = dr_chain[j];
5423 		vect2 = dr_chain[j+length/2];
5424 
5425 		/* Create interleaving stmt:
5426 		   high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
5427 							...}>  */
5428 		high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
5429 		perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
5430 						 vect2, perm_mask_high);
5431 		vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5432 		(*result_chain)[2*j] = high;
5433 
5434 		/* Create interleaving stmt:
5435 		   low = VEC_PERM_EXPR <vect1, vect2,
5436 					{nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
5437 					 ...}>  */
5438 		low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
5439 		perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
5440 						 vect2, perm_mask_low);
5441 		vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5442 		(*result_chain)[2*j+1] = low;
5443 	      }
5444 	    memcpy (dr_chain.address (), result_chain->address (),
5445 		    length * sizeof (tree));
5446 	  }
5447     }
5448 }
5449 
5450 /* Function vect_setup_realignment
5451 
5452    This function is called when vectorizing an unaligned load using
5453    the dr_explicit_realign[_optimized] scheme.
5454    This function generates the following code at the loop prolog:
5455 
5456       p = initial_addr;
5457    x  msq_init = *(floor(p));   # prolog load
5458       realignment_token = call target_builtin;
5459     loop:
5460    x  msq = phi (msq_init, ---)
5461 
5462    The stmts marked with x are generated only for the case of
5463    dr_explicit_realign_optimized.
5464 
5465    The code above sets up a new (vector) pointer, pointing to the first
5466    location accessed by STMT, and a "floor-aligned" load using that pointer.
5467    It also generates code to compute the "realignment-token" (if the relevant
5468    target hook was defined), and creates a phi-node at the loop-header bb
5469    whose arguments are the result of the prolog-load (created by this
5470    function) and the result of a load that takes place in the loop (to be
5471    created by the caller to this function).
5472 
5473    For the case of dr_explicit_realign_optimized:
5474    The caller to this function uses the phi-result (msq) to create the
5475    realignment code inside the loop, and sets up the missing phi argument,
5476    as follows:
5477     loop:
5478       msq = phi (msq_init, lsq)
5479       lsq = *(floor(p'));        # load in loop
5480       result = realign_load (msq, lsq, realignment_token);
5481 
5482    For the case of dr_explicit_realign:
5483     loop:
5484       msq = *(floor(p)); 	# load in loop
5485       p' = p + (VS-1);
5486       lsq = *(floor(p'));	# load in loop
5487       result = realign_load (msq, lsq, realignment_token);
5488 
5489    Input:
5490    STMT - (scalar) load stmt to be vectorized. This load accesses
5491           a memory location that may be unaligned.
5492    BSI - place where new code is to be inserted.
5493    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5494 			      is used.
5495 
5496    Output:
5497    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5498                        target hook, if defined.
5499    Return value - the result of the loop-header phi node.  */
5500 
5501 tree
5502 vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
5503                         tree *realignment_token,
5504 			enum dr_alignment_support alignment_support_scheme,
5505 			tree init_addr,
5506 			struct loop **at_loop)
5507 {
5508   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5509   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5510   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5511   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5512   struct loop *loop = NULL;
5513   edge pe = NULL;
5514   tree scalar_dest = gimple_assign_lhs (stmt);
5515   tree vec_dest;
5516   gimple *inc;
5517   tree ptr;
5518   tree data_ref;
5519   basic_block new_bb;
5520   tree msq_init = NULL_TREE;
5521   tree new_temp;
5522   gphi *phi_stmt;
5523   tree msq = NULL_TREE;
5524   gimple_seq stmts = NULL;
5525   bool inv_p;
5526   bool compute_in_loop = false;
5527   bool nested_in_vect_loop = false;
5528   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5529   struct loop *loop_for_initial_load = NULL;
5530 
5531   if (loop_vinfo)
5532     {
5533       loop = LOOP_VINFO_LOOP (loop_vinfo);
5534       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5535     }
5536 
5537   gcc_assert (alignment_support_scheme == dr_explicit_realign
5538 	      || alignment_support_scheme == dr_explicit_realign_optimized);
5539 
5540   /* We need to generate three things:
5541      1. the misalignment computation
5542      2. the extra vector load (for the optimized realignment scheme).
5543      3. the phi node for the two vectors from which the realignment is
5544       done (for the optimized realignment scheme).  */
5545 
5546   /* 1. Determine where to generate the misalignment computation.
5547 
5548      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5549      calculation will be generated by this function, outside the loop (in the
5550      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
5551      caller, inside the loop.
5552 
5553      Background: If the misalignment remains fixed throughout the iterations of
5554      the loop, then both realignment schemes are applicable, and also the
5555      misalignment computation can be done outside LOOP.  This is because we are
5556      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5557      are a multiple of VS (the Vector Size), and therefore the misalignment in
5558      different vectorized LOOP iterations is always the same.
5559      The problem arises only if the memory access is in an inner-loop nested
5560      inside LOOP, which is now being vectorized using outer-loop vectorization.
5561      This is the only case when the misalignment of the memory access may not
5562      remain fixed throughout the iterations of the inner-loop (as explained in
5563      detail in vect_supportable_dr_alignment).  In this case, not only is the
5564      optimized realignment scheme not applicable, but also the misalignment
5565      computation (and generation of the realignment token that is passed to
5566      REALIGN_LOAD) have to be done inside the loop.
5567 
5568      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5569      or not, which in turn determines if the misalignment is computed inside
5570      the inner-loop, or outside LOOP.  */
5571 
5572   if (init_addr != NULL_TREE || !loop_vinfo)
5573     {
5574       compute_in_loop = true;
5575       gcc_assert (alignment_support_scheme == dr_explicit_realign);
5576     }
5577 
5578 
5579   /* 2. Determine where to generate the extra vector load.
5580 
5581      For the optimized realignment scheme, instead of generating two vector
5582      loads in each iteration, we generate a single extra vector load in the
5583      preheader of the loop, and in each iteration reuse the result of the
5584      vector load from the previous iteration.  In case the memory access is in
5585      an inner-loop nested inside LOOP, which is now being vectorized using
5586      outer-loop vectorization, we need to determine whether this initial vector
5587      load should be generated at the preheader of the inner-loop, or can be
5588      generated at the preheader of LOOP.  If the memory access has no evolution
5589      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5590      to be generated inside LOOP (in the preheader of the inner-loop).  */
5591 
5592   if (nested_in_vect_loop)
5593     {
5594       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5595       bool invariant_in_outerloop =
5596             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5597       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5598     }
5599   else
5600     loop_for_initial_load = loop;
5601   if (at_loop)
5602     *at_loop = loop_for_initial_load;
5603 
5604   if (loop_for_initial_load)
5605     pe = loop_preheader_edge (loop_for_initial_load);
5606 
5607   /* 3. For the case of the optimized realignment, create the first vector
5608       load at the loop preheader.  */
5609 
5610   if (alignment_support_scheme == dr_explicit_realign_optimized)
5611     {
5612       /* Create msq_init = *(floor(p1)) in the loop preheader  */
5613       gassign *new_stmt;
5614 
5615       gcc_assert (!compute_in_loop);
5616       vec_dest = vect_create_destination_var (scalar_dest, vectype);
5617       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
5618 				      NULL_TREE, &init_addr, NULL, &inc,
5619 				      true, &inv_p);
5620       if (TREE_CODE (ptr) == SSA_NAME)
5621 	new_temp = copy_ssa_name (ptr);
5622       else
5623 	new_temp = make_ssa_name (TREE_TYPE (ptr));
5624       unsigned int align = DR_TARGET_ALIGNMENT (dr);
5625       new_stmt = gimple_build_assign
5626 		   (new_temp, BIT_AND_EXPR, ptr,
5627 		    build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
5628       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5629       gcc_assert (!new_bb);
5630       data_ref
5631 	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
5632 		  build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
5633       vect_copy_ref_info (data_ref, DR_REF (dr));
5634       new_stmt = gimple_build_assign (vec_dest, data_ref);
5635       new_temp = make_ssa_name (vec_dest, new_stmt);
5636       gimple_assign_set_lhs (new_stmt, new_temp);
5637       if (pe)
5638         {
5639           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5640           gcc_assert (!new_bb);
5641         }
5642       else
5643          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5644 
5645       msq_init = gimple_assign_lhs (new_stmt);
5646     }
5647 
5648   /* 4. Create realignment token using a target builtin, if available.
5649       It is done either inside the containing loop, or before LOOP (as
5650       determined above).  */
5651 
5652   if (targetm.vectorize.builtin_mask_for_load)
5653     {
5654       gcall *new_stmt;
5655       tree builtin_decl;
5656 
5657       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
5658       if (!init_addr)
5659 	{
5660 	  /* Generate the INIT_ADDR computation outside LOOP.  */
5661 	  init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5662 							    NULL_TREE);
5663           if (loop)
5664             {
5665    	      pe = loop_preheader_edge (loop);
5666 	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
5667 	      gcc_assert (!new_bb);
5668             }
5669           else
5670              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5671 	}
5672 
5673       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5674       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
5675       vec_dest =
5676 	vect_create_destination_var (scalar_dest,
5677 				     gimple_call_return_type (new_stmt));
5678       new_temp = make_ssa_name (vec_dest, new_stmt);
5679       gimple_call_set_lhs (new_stmt, new_temp);
5680 
5681       if (compute_in_loop)
5682 	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5683       else
5684 	{
5685 	  /* Generate the misalignment computation outside LOOP.  */
5686 	  pe = loop_preheader_edge (loop);
5687 	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5688 	  gcc_assert (!new_bb);
5689 	}
5690 
5691       *realignment_token = gimple_call_lhs (new_stmt);
5692 
5693       /* The result of the CALL_EXPR to this builtin is determined from
5694          the value of the parameter and no global variables are touched
5695          which makes the builtin a "const" function.  Requiring the
5696          builtin to have the "const" attribute makes it unnecessary
5697          to call mark_call_clobbered.  */
5698       gcc_assert (TREE_READONLY (builtin_decl));
5699     }
5700 
5701   if (alignment_support_scheme == dr_explicit_realign)
5702     return msq;
5703 
5704   gcc_assert (!compute_in_loop);
5705   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5706 
5707 
5708   /* 5. Create msq = phi <msq_init, lsq> in loop  */
5709 
5710   pe = loop_preheader_edge (containing_loop);
5711   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5712   msq = make_ssa_name (vec_dest);
5713   phi_stmt = create_phi_node (msq, containing_loop->header);
5714   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
5715 
5716   return msq;
5717 }
5718 
5719 
5720 /* Function vect_grouped_load_supported.
5721 
5722    COUNT is the size of the load group (the number of statements plus the
5723    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
5724    only one statement, with a gap of COUNT - 1.
5725 
5726    Returns true if a suitable permute exists.  */
5727 
5728 bool
5729 vect_grouped_load_supported (tree vectype, bool single_element_p,
5730 			     unsigned HOST_WIDE_INT count)
5731 {
5732   machine_mode mode = TYPE_MODE (vectype);
5733 
5734   /* If this is single-element interleaving with an element distance
5735      that leaves unused vector loads around punt - we at least create
5736      very sub-optimal code in that case (and blow up memory,
5737      see PR65518).  */
5738   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
5739     {
5740       if (dump_enabled_p ())
5741 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5742 			 "single-element interleaving not supported "
5743 			 "for not adjacent vector loads\n");
5744       return false;
5745     }
5746 
5747   /* vect_permute_load_chain requires the group size to be equal to 3 or
5748      be a power of two.  */
5749   if (count != 3 && exact_log2 (count) == -1)
5750     {
5751       if (dump_enabled_p ())
5752 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5753 			 "the size of the group of accesses"
5754 			 " is not a power of 2 or not equal to 3\n");
5755       return false;
5756     }
5757 
5758   /* Check that the permutation is supported.  */
5759   if (VECTOR_MODE_P (mode))
5760     {
5761       unsigned int i, j;
5762       if (count == 3)
5763 	{
5764 	  unsigned int nelt;
5765 	  if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5766 	    {
5767 	      if (dump_enabled_p ())
5768 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5769 				 "cannot handle groups of 3 loads for"
5770 				 " variable-length vectors\n");
5771 	      return false;
5772 	    }
5773 
5774 	  vec_perm_builder sel (nelt, nelt, 1);
5775 	  sel.quick_grow (nelt);
5776 	  vec_perm_indices indices;
5777 	  unsigned int k;
5778 	  for (k = 0; k < 3; k++)
5779 	    {
5780 	      for (i = 0; i < nelt; i++)
5781 		if (3 * i + k < 2 * nelt)
5782 		  sel[i] = 3 * i + k;
5783 		else
5784 		  sel[i] = 0;
5785 	      indices.new_vector (sel, 2, nelt);
5786 	      if (!can_vec_perm_const_p (mode, indices))
5787 		{
5788 		  if (dump_enabled_p ())
5789 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5790 				     "shuffle of 3 loads is not supported by"
5791 				     " target\n");
5792 		  return false;
5793 		}
5794 	      for (i = 0, j = 0; i < nelt; i++)
5795 		if (3 * i + k < 2 * nelt)
5796 		  sel[i] = i;
5797 		else
5798 		  sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
5799 	      indices.new_vector (sel, 2, nelt);
5800 	      if (!can_vec_perm_const_p (mode, indices))
5801 		{
5802 		  if (dump_enabled_p ())
5803 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5804 				     "shuffle of 3 loads is not supported by"
5805 				     " target\n");
5806 		  return false;
5807 		}
5808 	    }
5809 	  return true;
5810 	}
5811       else
5812 	{
5813 	  /* If length is not equal to 3 then only power of 2 is supported.  */
5814 	  gcc_assert (pow2p_hwi (count));
5815 	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
5816 
5817 	  /* The encoding has a single stepped pattern.  */
5818 	  vec_perm_builder sel (nelt, 1, 3);
5819 	  sel.quick_grow (3);
5820 	  for (i = 0; i < 3; i++)
5821 	    sel[i] = i * 2;
5822 	  vec_perm_indices indices (sel, 2, nelt);
5823 	  if (can_vec_perm_const_p (mode, indices))
5824 	    {
5825 	      for (i = 0; i < 3; i++)
5826 		sel[i] = i * 2 + 1;
5827 	      indices.new_vector (sel, 2, nelt);
5828 	      if (can_vec_perm_const_p (mode, indices))
5829 		return true;
5830 	    }
5831         }
5832     }
5833 
5834   if (dump_enabled_p ())
5835     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5836 		     "extract even/odd not supported by target\n");
5837   return false;
5838 }
5839 
5840 /* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
5841    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
5842 
5843 bool
5844 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5845 			   bool masked_p)
5846 {
5847   if (masked_p)
5848     return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
5849 					 vec_mask_load_lanes_optab,
5850 					 vectype, count);
5851   else
5852     return vect_lanes_optab_supported_p ("vec_load_lanes",
5853 					 vec_load_lanes_optab,
5854 					 vectype, count);
5855 }
5856 
5857 /* Function vect_permute_load_chain.
5858 
5859    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5860    a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
5861    the input data correctly.  Return the final references for loads in
5862    RESULT_CHAIN.
5863 
5864    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5865    The input is 4 vectors each containing 8 elements. We assign a number to each
5866    element, the input sequence is:
5867 
5868    1st vec:   0  1  2  3  4  5  6  7
5869    2nd vec:   8  9 10 11 12 13 14 15
5870    3rd vec:  16 17 18 19 20 21 22 23
5871    4th vec:  24 25 26 27 28 29 30 31
5872 
5873    The output sequence should be:
5874 
5875    1st vec:  0 4  8 12 16 20 24 28
5876    2nd vec:  1 5  9 13 17 21 25 29
5877    3rd vec:  2 6 10 14 18 22 26 30
5878    4th vec:  3 7 11 15 19 23 27 31
5879 
5880    i.e., the first output vector should contain the first elements of each
5881    interleaving group, etc.
5882 
5883    We use extract_even/odd instructions to create such output.  The input of
5884    each extract_even/odd operation is two vectors
5885    1st vec    2nd vec
5886    0 1 2 3    4 5 6 7
5887 
5888    and the output is the vector of extracted even/odd elements.  The output of
5889    extract_even will be:   0 2 4 6
5890    and of extract_odd:     1 3 5 7
5891 
5892 
5893    The permutation is done in log LENGTH stages.  In each stage extract_even
5894    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
5895    their order.  In our example,
5896 
5897    E1: extract_even (1st vec, 2nd vec)
5898    E2: extract_odd (1st vec, 2nd vec)
5899    E3: extract_even (3rd vec, 4th vec)
5900    E4: extract_odd (3rd vec, 4th vec)
5901 
5902    The output for the first stage will be:
5903 
5904    E1:  0  2  4  6  8 10 12 14
5905    E2:  1  3  5  7  9 11 13 15
5906    E3: 16 18 20 22 24 26 28 30
5907    E4: 17 19 21 23 25 27 29 31
5908 
5909    In order to proceed and create the correct sequence for the next stage (or
5910    for the correct output, if the second stage is the last one, as in our
5911    example), we first put the output of extract_even operation and then the
5912    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5913    The input for the second stage is:
5914 
5915    1st vec (E1):  0  2  4  6  8 10 12 14
5916    2nd vec (E3): 16 18 20 22 24 26 28 30
5917    3rd vec (E2):  1  3  5  7  9 11 13 15
5918    4th vec (E4): 17 19 21 23 25 27 29 31
5919 
5920    The output of the second stage:
5921 
5922    E1: 0 4  8 12 16 20 24 28
5923    E2: 2 6 10 14 18 22 26 30
5924    E3: 1 5  9 13 17 21 25 29
5925    E4: 3 7 11 15 19 23 27 31
5926 
5927    And RESULT_CHAIN after reordering:
5928 
5929    1st vec (E1):  0 4  8 12 16 20 24 28
5930    2nd vec (E3):  1 5  9 13 17 21 25 29
5931    3rd vec (E2):  2 6 10 14 18 22 26 30
5932    4th vec (E4):  3 7 11 15 19 23 27 31.  */
5933 
5934 static void
5935 vect_permute_load_chain (vec<tree> dr_chain,
5936 			 unsigned int length,
5937 			 gimple *stmt,
5938 			 gimple_stmt_iterator *gsi,
5939 			 vec<tree> *result_chain)
5940 {
5941   tree data_ref, first_vect, second_vect;
5942   tree perm_mask_even, perm_mask_odd;
5943   tree perm3_mask_low, perm3_mask_high;
5944   gimple *perm_stmt;
5945   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5946   unsigned int i, j, log_length = exact_log2 (length);
5947 
5948   result_chain->quick_grow (length);
5949   memcpy (result_chain->address (), dr_chain.address (),
5950 	  length * sizeof (tree));
5951 
5952   if (length == 3)
5953     {
5954       /* vect_grouped_load_supported ensures that this is constant.  */
5955       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5956       unsigned int k;
5957 
5958       vec_perm_builder sel (nelt, nelt, 1);
5959       sel.quick_grow (nelt);
5960       vec_perm_indices indices;
5961       for (k = 0; k < 3; k++)
5962 	{
5963 	  for (i = 0; i < nelt; i++)
5964 	    if (3 * i + k < 2 * nelt)
5965 	      sel[i] = 3 * i + k;
5966 	    else
5967 	      sel[i] = 0;
5968 	  indices.new_vector (sel, 2, nelt);
5969 	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5970 
5971 	  for (i = 0, j = 0; i < nelt; i++)
5972 	    if (3 * i + k < 2 * nelt)
5973 	      sel[i] = i;
5974 	    else
5975 	      sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
5976 	  indices.new_vector (sel, 2, nelt);
5977 	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5978 
5979 	  first_vect = dr_chain[0];
5980 	  second_vect = dr_chain[1];
5981 
5982 	  /* Create interleaving stmt (low part of):
5983 	     low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5984 							     ...}>  */
5985 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
5986 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5987 					   second_vect, perm3_mask_low);
5988 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5989 
5990 	  /* Create interleaving stmt (high part of):
5991 	     high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5992 							      ...}>  */
5993 	  first_vect = data_ref;
5994 	  second_vect = dr_chain[2];
5995 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
5996 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5997 					   second_vect, perm3_mask_high);
5998 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5999 	  (*result_chain)[k] = data_ref;
6000 	}
6001     }
6002   else
6003     {
6004       /* If length is not equal to 3 then only power of 2 is supported.  */
6005       gcc_assert (pow2p_hwi (length));
6006 
6007       /* The encoding has a single stepped pattern.  */
6008       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
6009       vec_perm_builder sel (nelt, 1, 3);
6010       sel.quick_grow (3);
6011       for (i = 0; i < 3; ++i)
6012 	sel[i] = i * 2;
6013       vec_perm_indices indices (sel, 2, nelt);
6014       perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
6015 
6016       for (i = 0; i < 3; ++i)
6017 	sel[i] = i * 2 + 1;
6018       indices.new_vector (sel, 2, nelt);
6019       perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
6020 
6021       for (i = 0; i < log_length; i++)
6022 	{
6023 	  for (j = 0; j < length; j += 2)
6024 	    {
6025 	      first_vect = dr_chain[j];
6026 	      second_vect = dr_chain[j+1];
6027 
6028 	      /* data_ref = permute_even (first_data_ref, second_data_ref);  */
6029 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
6030 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6031 					       first_vect, second_vect,
6032 					       perm_mask_even);
6033 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6034 	      (*result_chain)[j/2] = data_ref;
6035 
6036 	      /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
6037 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
6038 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6039 					       first_vect, second_vect,
6040 					       perm_mask_odd);
6041 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6042 	      (*result_chain)[j/2+length/2] = data_ref;
6043 	    }
6044 	  memcpy (dr_chain.address (), result_chain->address (),
6045 		  length * sizeof (tree));
6046 	}
6047     }
6048 }
6049 
6050 /* Function vect_shift_permute_load_chain.
6051 
6052    Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
6053    sequence of stmts to reorder the input data accordingly.
6054    Return the final references for loads in RESULT_CHAIN.
6055    Return true if successed, false otherwise.
6056 
6057    E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
6058    The input is 3 vectors each containing 8 elements.  We assign a
6059    number to each element, the input sequence is:
6060 
6061    1st vec:   0  1  2  3  4  5  6  7
6062    2nd vec:   8  9 10 11 12 13 14 15
6063    3rd vec:  16 17 18 19 20 21 22 23
6064 
6065    The output sequence should be:
6066 
6067    1st vec:  0 3 6  9 12 15 18 21
6068    2nd vec:  1 4 7 10 13 16 19 22
6069    3rd vec:  2 5 8 11 14 17 20 23
6070 
6071    We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
6072 
6073    First we shuffle all 3 vectors to get correct elements order:
6074 
6075    1st vec:  ( 0  3  6) ( 1  4  7) ( 2  5)
6076    2nd vec:  ( 8 11 14) ( 9 12 15) (10 13)
6077    3rd vec:  (16 19 22) (17 20 23) (18 21)
6078 
6079    Next we unite and shift vector 3 times:
6080 
6081    1st step:
6082      shift right by 6 the concatenation of:
6083      "1st vec" and  "2nd vec"
6084        ( 0  3  6) ( 1  4  7) |( 2  5) _ ( 8 11 14) ( 9 12 15)| (10 13)
6085      "2nd vec" and  "3rd vec"
6086        ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
6087      "3rd vec" and  "1st vec"
6088        (16 19 22) (17 20 23) |(18 21) _ ( 0  3  6) ( 1  4  7)| ( 2  5)
6089 			     | New vectors                   |
6090 
6091      So that now new vectors are:
6092 
6093      1st vec:  ( 2  5) ( 8 11 14) ( 9 12 15)
6094      2nd vec:  (10 13) (16 19 22) (17 20 23)
6095      3rd vec:  (18 21) ( 0  3  6) ( 1  4  7)
6096 
6097    2nd step:
6098      shift right by 5 the concatenation of:
6099      "1st vec" and  "3rd vec"
6100        ( 2  5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0  3  6)| ( 1  4  7)
6101      "2nd vec" and  "1st vec"
6102        (10 13) (16 19 22) |(17 20 23) _ ( 2  5) ( 8 11 14)| ( 9 12 15)
6103      "3rd vec" and  "2nd vec"
6104        (18 21) ( 0  3  6) |( 1  4  7) _ (10 13) (16 19 22)| (17 20 23)
6105 			  | New vectors                   |
6106 
6107      So that now new vectors are:
6108 
6109      1st vec:  ( 9 12 15) (18 21) ( 0  3  6)
6110      2nd vec:  (17 20 23) ( 2  5) ( 8 11 14)
6111      3rd vec:  ( 1  4  7) (10 13) (16 19 22) READY
6112 
6113    3rd step:
6114      shift right by 5 the concatenation of:
6115      "1st vec" and  "1st vec"
6116        ( 9 12 15) (18 21) |( 0  3  6) _ ( 9 12 15) (18 21)| ( 0  3  6)
6117      shift right by 3 the concatenation of:
6118      "2nd vec" and  "2nd vec"
6119                (17 20 23) |( 2  5) ( 8 11 14) _ (17 20 23)| ( 2  5) ( 8 11 14)
6120 			  | New vectors                   |
6121 
6122      So that now all vectors are READY:
6123      1st vec:  ( 0  3  6) ( 9 12 15) (18 21)
6124      2nd vec:  ( 2  5) ( 8 11 14) (17 20 23)
6125      3rd vec:  ( 1  4  7) (10 13) (16 19 22)
6126 
6127    This algorithm is faster than one in vect_permute_load_chain if:
6128      1.  "shift of a concatination" is faster than general permutation.
6129 	 This is usually so.
6130      2.  The TARGET machine can't execute vector instructions in parallel.
6131 	 This is because each step of the algorithm depends on previous.
6132 	 The algorithm in vect_permute_load_chain is much more parallel.
6133 
6134    The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
6135 */
6136 
6137 static bool
6138 vect_shift_permute_load_chain (vec<tree> dr_chain,
6139 			       unsigned int length,
6140 			       gimple *stmt,
6141 			       gimple_stmt_iterator *gsi,
6142 			       vec<tree> *result_chain)
6143 {
6144   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
6145   tree perm2_mask1, perm2_mask2, perm3_mask;
6146   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
6147   gimple *perm_stmt;
6148 
6149   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
6150   unsigned int i;
6151   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6152   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6153 
6154   unsigned HOST_WIDE_INT nelt, vf;
6155   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
6156       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
6157     /* Not supported for variable-length vectors.  */
6158     return false;
6159 
6160   vec_perm_builder sel (nelt, nelt, 1);
6161   sel.quick_grow (nelt);
6162 
6163   result_chain->quick_grow (length);
6164   memcpy (result_chain->address (), dr_chain.address (),
6165 	  length * sizeof (tree));
6166 
6167   if (pow2p_hwi (length) && vf > 4)
6168     {
6169       unsigned int j, log_length = exact_log2 (length);
6170       for (i = 0; i < nelt / 2; ++i)
6171 	sel[i] = i * 2;
6172       for (i = 0; i < nelt / 2; ++i)
6173 	sel[nelt / 2 + i] = i * 2 + 1;
6174       vec_perm_indices indices (sel, 2, nelt);
6175       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6176 	{
6177 	  if (dump_enabled_p ())
6178 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6179 			     "shuffle of 2 fields structure is not \
6180 			      supported by target\n");
6181 	  return false;
6182 	}
6183       perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
6184 
6185       for (i = 0; i < nelt / 2; ++i)
6186 	sel[i] = i * 2 + 1;
6187       for (i = 0; i < nelt / 2; ++i)
6188 	sel[nelt / 2 + i] = i * 2;
6189       indices.new_vector (sel, 2, nelt);
6190       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6191 	{
6192 	  if (dump_enabled_p ())
6193 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6194 			     "shuffle of 2 fields structure is not \
6195 			      supported by target\n");
6196 	  return false;
6197 	}
6198       perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
6199 
6200       /* Generating permutation constant to shift all elements.
6201 	 For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
6202       for (i = 0; i < nelt; i++)
6203 	sel[i] = nelt / 2 + i;
6204       indices.new_vector (sel, 2, nelt);
6205       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6206 	{
6207 	  if (dump_enabled_p ())
6208 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6209 			     "shift permutation is not supported by target\n");
6210 	  return false;
6211 	}
6212       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6213 
6214       /* Generating permutation constant to select vector from 2.
6215 	 For vector length 8 it is {0 1 2 3 12 13 14 15}.  */
6216       for (i = 0; i < nelt / 2; i++)
6217 	sel[i] = i;
6218       for (i = nelt / 2; i < nelt; i++)
6219 	sel[i] = nelt + i;
6220       indices.new_vector (sel, 2, nelt);
6221       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6222 	{
6223 	  if (dump_enabled_p ())
6224 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6225 			     "select is not supported by target\n");
6226 	  return false;
6227 	}
6228       select_mask = vect_gen_perm_mask_checked (vectype, indices);
6229 
6230       for (i = 0; i < log_length; i++)
6231 	{
6232 	  for (j = 0; j < length; j += 2)
6233 	    {
6234 	      first_vect = dr_chain[j];
6235 	      second_vect = dr_chain[j + 1];
6236 
6237 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
6238 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6239 					       first_vect, first_vect,
6240 					       perm2_mask1);
6241 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6242 	      vect[0] = data_ref;
6243 
6244 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
6245 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6246 					       second_vect, second_vect,
6247 					       perm2_mask2);
6248 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6249 	      vect[1] = data_ref;
6250 
6251 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
6252 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6253 					       vect[0], vect[1], shift1_mask);
6254 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6255 	      (*result_chain)[j/2 + length/2] = data_ref;
6256 
6257 	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
6258 	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6259 					       vect[0], vect[1], select_mask);
6260 	      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6261 	      (*result_chain)[j/2] = data_ref;
6262 	    }
6263 	  memcpy (dr_chain.address (), result_chain->address (),
6264 		  length * sizeof (tree));
6265 	}
6266       return true;
6267     }
6268   if (length == 3 && vf > 2)
6269     {
6270       unsigned int k = 0, l = 0;
6271 
6272       /* Generating permutation constant to get all elements in rigth order.
6273 	 For vector length 8 it is {0 3 6 1 4 7 2 5}.  */
6274       for (i = 0; i < nelt; i++)
6275 	{
6276 	  if (3 * k + (l % 3) >= nelt)
6277 	    {
6278 	      k = 0;
6279 	      l += (3 - (nelt % 3));
6280 	    }
6281 	  sel[i] = 3 * k + (l % 3);
6282 	  k++;
6283 	}
6284       vec_perm_indices indices (sel, 2, nelt);
6285       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6286 	{
6287 	  if (dump_enabled_p ())
6288 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6289 			     "shuffle of 3 fields structure is not \
6290 			      supported by target\n");
6291 	  return false;
6292 	}
6293       perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
6294 
6295       /* Generating permutation constant to shift all elements.
6296 	 For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
6297       for (i = 0; i < nelt; i++)
6298 	sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
6299       indices.new_vector (sel, 2, nelt);
6300       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6301 	{
6302 	  if (dump_enabled_p ())
6303 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6304 			     "shift permutation is not supported by target\n");
6305 	  return false;
6306 	}
6307       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6308 
6309       /* Generating permutation constant to shift all elements.
6310 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
6311       for (i = 0; i < nelt; i++)
6312 	sel[i] = 2 * (nelt / 3) + 1 + i;
6313       indices.new_vector (sel, 2, nelt);
6314       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6315 	{
6316 	  if (dump_enabled_p ())
6317 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6318 			     "shift permutation is not supported by target\n");
6319 	  return false;
6320 	}
6321       shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
6322 
6323       /* Generating permutation constant to shift all elements.
6324 	 For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
6325       for (i = 0; i < nelt; i++)
6326 	sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
6327       indices.new_vector (sel, 2, nelt);
6328       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6329 	{
6330 	  if (dump_enabled_p ())
6331 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6332 			     "shift permutation is not supported by target\n");
6333 	  return false;
6334 	}
6335       shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
6336 
6337       /* Generating permutation constant to shift all elements.
6338 	 For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
6339       for (i = 0; i < nelt; i++)
6340 	sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
6341       indices.new_vector (sel, 2, nelt);
6342       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6343 	{
6344 	  if (dump_enabled_p ())
6345 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6346 			     "shift permutation is not supported by target\n");
6347 	  return false;
6348 	}
6349       shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
6350 
6351       for (k = 0; k < 3; k++)
6352 	{
6353 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
6354 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6355 					   dr_chain[k], dr_chain[k],
6356 					   perm3_mask);
6357 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6358 	  vect[k] = data_ref;
6359 	}
6360 
6361       for (k = 0; k < 3; k++)
6362 	{
6363 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
6364 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6365 					   vect[k % 3], vect[(k + 1) % 3],
6366 					   shift1_mask);
6367 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6368 	  vect_shift[k] = data_ref;
6369 	}
6370 
6371       for (k = 0; k < 3; k++)
6372 	{
6373 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
6374 	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6375 					   vect_shift[(4 - k) % 3],
6376 					   vect_shift[(3 - k) % 3],
6377 					   shift2_mask);
6378 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6379 	  vect[k] = data_ref;
6380 	}
6381 
6382       (*result_chain)[3 - (nelt % 3)] = vect[2];
6383 
6384       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
6385       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
6386 				       vect[0], shift3_mask);
6387       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6388       (*result_chain)[nelt % 3] = data_ref;
6389 
6390       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
6391       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
6392 				       vect[1], shift4_mask);
6393       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6394       (*result_chain)[0] = data_ref;
6395       return true;
6396     }
6397   return false;
6398 }
6399 
6400 /* Function vect_transform_grouped_load.
6401 
6402    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
6403    to perform their permutation and ascribe the result vectorized statements to
6404    the scalar statements.
6405 */
6406 
6407 void
6408 vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
6409 			     gimple_stmt_iterator *gsi)
6410 {
6411   machine_mode mode;
6412   vec<tree> result_chain = vNULL;
6413 
6414   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
6415      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
6416      vectors, that are ready for vector computation.  */
6417   result_chain.create (size);
6418 
6419   /* If reassociation width for vector type is 2 or greater target machine can
6420      execute 2 or more vector instructions in parallel.  Otherwise try to
6421      get chain for loads group using vect_shift_permute_load_chain.  */
6422   mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
6423   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
6424       || pow2p_hwi (size)
6425       || !vect_shift_permute_load_chain (dr_chain, size, stmt,
6426 					 gsi, &result_chain))
6427     vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
6428   vect_record_grouped_load_vectors (stmt, result_chain);
6429   result_chain.release ();
6430 }
6431 
6432 /* RESULT_CHAIN contains the output of a group of grouped loads that were
6433    generated as part of the vectorization of STMT.  Assign the statement
6434    for each vector to the associated scalar statement.  */
6435 
6436 void
6437 vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
6438 {
6439   gimple *first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
6440   gimple *next_stmt, *new_stmt;
6441   unsigned int i, gap_count;
6442   tree tmp_data_ref;
6443 
6444   /* Put a permuted data-ref in the VECTORIZED_STMT field.
6445      Since we scan the chain starting from it's first node, their order
6446      corresponds the order of data-refs in RESULT_CHAIN.  */
6447   next_stmt = first_stmt;
6448   gap_count = 1;
6449   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
6450     {
6451       if (!next_stmt)
6452 	break;
6453 
6454       /* Skip the gaps.  Loads created for the gaps will be removed by dead
6455        code elimination pass later.  No need to check for the first stmt in
6456        the group, since it always exists.
6457        GROUP_GAP is the number of steps in elements from the previous
6458        access (if there is no gap GROUP_GAP is 1).  We skip loads that
6459        correspond to the gaps.  */
6460       if (next_stmt != first_stmt
6461           && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
6462       {
6463         gap_count++;
6464         continue;
6465       }
6466 
6467       while (next_stmt)
6468         {
6469 	  new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
6470 	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
6471 	     copies, and we put the new vector statement in the first available
6472 	     RELATED_STMT.  */
6473 	  if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
6474 	    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
6475 	  else
6476             {
6477               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
6478                 {
6479 		  gimple *prev_stmt =
6480 		    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
6481 		  gimple *rel_stmt =
6482 		    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
6483 	          while (rel_stmt)
6484 		    {
6485 		      prev_stmt = rel_stmt;
6486 		      rel_stmt =
6487                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
6488 		    }
6489 
6490   	          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
6491                     new_stmt;
6492                 }
6493             }
6494 
6495 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6496 	  gap_count = 1;
6497 	  /* If NEXT_STMT accesses the same DR as the previous statement,
6498 	     put the same TMP_DATA_REF as its vectorized statement; otherwise
6499 	     get the next data-ref from RESULT_CHAIN.  */
6500 	  if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
6501 	    break;
6502         }
6503     }
6504 }
6505 
6506 /* Function vect_force_dr_alignment_p.
6507 
6508    Returns whether the alignment of a DECL can be forced to be aligned
6509    on ALIGNMENT bit boundary.  */
6510 
6511 bool
6512 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
6513 {
6514   if (!VAR_P (decl))
6515     return false;
6516 
6517   if (decl_in_symtab_p (decl)
6518       && !symtab_node::get (decl)->can_increase_alignment_p ())
6519     return false;
6520 
6521   if (TREE_STATIC (decl))
6522     return (alignment <= MAX_OFILE_ALIGNMENT);
6523   else
6524     return (alignment <= MAX_STACK_ALIGNMENT);
6525 }
6526 
6527 
6528 /* Return whether the data reference DR is supported with respect to its
6529    alignment.
6530    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6531    it is aligned, i.e., check if it is possible to vectorize it with different
6532    alignment.  */
6533 
6534 enum dr_alignment_support
6535 vect_supportable_dr_alignment (struct data_reference *dr,
6536                                bool check_aligned_accesses)
6537 {
6538   gimple *stmt = DR_STMT (dr);
6539   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6540   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6541   machine_mode mode = TYPE_MODE (vectype);
6542   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6543   struct loop *vect_loop = NULL;
6544   bool nested_in_vect_loop = false;
6545 
6546   if (aligned_access_p (dr) && !check_aligned_accesses)
6547     return dr_aligned;
6548 
6549   /* For now assume all conditional loads/stores support unaligned
6550      access without any special code.  */
6551   if (is_gimple_call (stmt)
6552       && gimple_call_internal_p (stmt)
6553       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
6554 	  || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
6555     return dr_unaligned_supported;
6556 
6557   if (loop_vinfo)
6558     {
6559       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
6560       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
6561     }
6562 
6563   /* Possibly unaligned access.  */
6564 
6565   /* We can choose between using the implicit realignment scheme (generating
6566      a misaligned_move stmt) and the explicit realignment scheme (generating
6567      aligned loads with a REALIGN_LOAD).  There are two variants to the
6568      explicit realignment scheme: optimized, and unoptimized.
6569      We can optimize the realignment only if the step between consecutive
6570      vector loads is equal to the vector size.  Since the vector memory
6571      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6572      is guaranteed that the misalignment amount remains the same throughout the
6573      execution of the vectorized loop.  Therefore, we can create the
6574      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6575      at the loop preheader.
6576 
6577      However, in the case of outer-loop vectorization, when vectorizing a
6578      memory access in the inner-loop nested within the LOOP that is now being
6579      vectorized, while it is guaranteed that the misalignment of the
6580      vectorized memory access will remain the same in different outer-loop
6581      iterations, it is *not* guaranteed that is will remain the same throughout
6582      the execution of the inner-loop.  This is because the inner-loop advances
6583      with the original scalar step (and not in steps of VS).  If the inner-loop
6584      step happens to be a multiple of VS, then the misalignment remains fixed
6585      and we can use the optimized realignment scheme.  For example:
6586 
6587       for (i=0; i<N; i++)
6588         for (j=0; j<M; j++)
6589           s += a[i+j];
6590 
6591      When vectorizing the i-loop in the above example, the step between
6592      consecutive vector loads is 1, and so the misalignment does not remain
6593      fixed across the execution of the inner-loop, and the realignment cannot
6594      be optimized (as illustrated in the following pseudo vectorized loop):
6595 
6596       for (i=0; i<N; i+=4)
6597         for (j=0; j<M; j++){
6598           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6599                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
6600                          // (assuming that we start from an aligned address).
6601           }
6602 
6603      We therefore have to use the unoptimized realignment scheme:
6604 
6605       for (i=0; i<N; i+=4)
6606           for (j=k; j<M; j+=4)
6607           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6608                            // that the misalignment of the initial address is
6609                            // 0).
6610 
6611      The loop can then be vectorized as follows:
6612 
6613       for (k=0; k<4; k++){
6614         rt = get_realignment_token (&vp[k]);
6615         for (i=0; i<N; i+=4){
6616           v1 = vp[i+k];
6617           for (j=k; j<M; j+=4){
6618             v2 = vp[i+j+VS-1];
6619             va = REALIGN_LOAD <v1,v2,rt>;
6620             vs += va;
6621             v1 = v2;
6622           }
6623         }
6624     } */
6625 
6626   if (DR_IS_READ (dr))
6627     {
6628       bool is_packed = false;
6629       tree type = (TREE_TYPE (DR_REF (dr)));
6630 
6631       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
6632 	  && (!targetm.vectorize.builtin_mask_for_load
6633 	      || targetm.vectorize.builtin_mask_for_load ()))
6634 	{
6635 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6636 
6637 	  /* If we are doing SLP then the accesses need not have the
6638 	     same alignment, instead it depends on the SLP group size.  */
6639 	  if (loop_vinfo
6640 	      && STMT_SLP_TYPE (stmt_info)
6641 	      && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6642 			      * GROUP_SIZE (vinfo_for_stmt
6643 					    (GROUP_FIRST_ELEMENT (stmt_info))),
6644 			      TYPE_VECTOR_SUBPARTS (vectype)))
6645 	    ;
6646 	  else if (!loop_vinfo
6647 		   || (nested_in_vect_loop
6648 		       && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6649 				    GET_MODE_SIZE (TYPE_MODE (vectype)))))
6650 	    return dr_explicit_realign;
6651 	  else
6652 	    return dr_explicit_realign_optimized;
6653 	}
6654       if (!known_alignment_for_access_p (dr))
6655 	is_packed = not_size_aligned (DR_REF (dr));
6656 
6657       if (targetm.vectorize.support_vector_misalignment
6658 	    (mode, type, DR_MISALIGNMENT (dr), is_packed))
6659 	/* Can't software pipeline the loads, but can at least do them.  */
6660 	return dr_unaligned_supported;
6661     }
6662   else
6663     {
6664       bool is_packed = false;
6665       tree type = (TREE_TYPE (DR_REF (dr)));
6666 
6667       if (!known_alignment_for_access_p (dr))
6668 	is_packed = not_size_aligned (DR_REF (dr));
6669 
6670      if (targetm.vectorize.support_vector_misalignment
6671 	   (mode, type, DR_MISALIGNMENT (dr), is_packed))
6672        return dr_unaligned_supported;
6673     }
6674 
6675   /* Unsupported.  */
6676   return dr_unaligned_unsupported;
6677 }
6678