1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3    Free Software Foundation, Inc.
4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
5    and Ira Rosen <irar@il.ibm.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
41 
42 /* Need to include rtl.h, expr.h, etc. for optabs.  */
43 #include "expr.h"
44 #include "optabs.h"
45 
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
48 
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 			      tree vectype, unsigned HOST_WIDE_INT count)
52 {
53   enum machine_mode mode, array_mode;
54   bool limit_p;
55 
56   mode = TYPE_MODE (vectype);
57   limit_p = !targetm.array_mode_supported_p (mode, count);
58   array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 			      MODE_INT, limit_p);
60 
61   if (array_mode == BLKmode)
62     {
63       if (vect_print_dump_info (REPORT_DETAILS))
64 	fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 		 GET_MODE_NAME (mode), count);
66       return false;
67     }
68 
69   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70     {
71       if (vect_print_dump_info (REPORT_DETAILS))
72 	fprintf (vect_dump, "cannot use %s<%s><%s>",
73 		 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74       return false;
75     }
76 
77   if (vect_print_dump_info (REPORT_DETAILS))
78     fprintf (vect_dump, "can use %s<%s><%s>",
79 	     name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
80 
81   return true;
82 }
83 
84 
85 /* Return the smallest scalar part of STMT.
86    This is used to determine the vectype of the stmt.  We generally set the
87    vectype according to the type of the result (lhs).  For stmts whose
88    result-type is different than the type of the arguments (e.g., demotion,
89    promotion), vectype will be reset appropriately (later).  Note that we have
90    to visit the smallest datatype in this function, because that determines the
91    VF.  If the smallest datatype in the loop is present only as the rhs of a
92    promotion operation - we'd miss it.
93    Such a case, where a variable of this datatype does not appear in the lhs
94    anywhere in the loop, can only occur if it's an invariant: e.g.:
95    'int_x = (int) short_inv', which we'd expect to have been optimized away by
96    invariant motion.  However, we cannot rely on invariant motion to always
97    take invariants out of the loop, and so in the case of promotion we also
98    have to check the rhs.
99    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100    types.  */
101 
102 tree
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104                                HOST_WIDE_INT *rhs_size_unit)
105 {
106   tree scalar_type = gimple_expr_type (stmt);
107   HOST_WIDE_INT lhs, rhs;
108 
109   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
110 
111   if (is_gimple_assign (stmt)
112       && (gimple_assign_cast_p (stmt)
113           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
115     {
116       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
117 
118       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119       if (rhs < lhs)
120         scalar_type = rhs_type;
121     }
122 
123   *lhs_size_unit = lhs;
124   *rhs_size_unit = rhs;
125   return scalar_type;
126 }
127 
128 
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
131 
132 int
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
134 {
135   gimple next_stmt = first_stmt;
136   int result = 0;
137 
138   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139     return -1;
140 
141   while (next_stmt && next_stmt != stmt)
142     {
143       result++;
144       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
145     }
146 
147   if (next_stmt)
148     return result;
149   else
150     return -1;
151 }
152 
153 
154 /* Function vect_insert_into_interleaving_chain.
155 
156    Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */
157 
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 				     struct data_reference *drb)
161 {
162   gimple prev, next;
163   tree next_init;
164   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
166 
167   prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168   next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169   while (next)
170     {
171       next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172       if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
173 	{
174 	  /* Insert here.  */
175 	  GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 	  GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 	  return;
178 	}
179       prev = next;
180       next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
181     }
182 
183   /* We got to the end of the list. Insert here.  */
184   GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185   GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
186 }
187 
188 
189 /* Function vect_update_interleaving_chain.
190 
191    For two data-refs DRA and DRB that are a part of a chain interleaved data
192    accesses, update the interleaving chain.  DRB's INIT is smaller than DRA's.
193 
194    There are four possible cases:
195    1. New stmts - both DRA and DRB are not a part of any chain:
196       FIRST_DR = DRB
197       NEXT_DR (DRB) = DRA
198    2. DRB is a part of a chain and DRA is not:
199       no need to update FIRST_DR
200       no need to insert DRB
201       insert DRA according to init
202    3. DRA is a part of a chain and DRB is not:
203       if (init of FIRST_DR > init of DRB)
204           FIRST_DR = DRB
205 	  NEXT(FIRST_DR) = previous FIRST_DR
206       else
207           insert DRB according to its init
208    4. both DRA and DRB are in some interleaving chains:
209       choose the chain with the smallest init of FIRST_DR
210       insert the nodes of the second chain into the first one.  */
211 
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 				struct data_reference *dra)
215 {
216   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218   tree next_init, init_dra_chain, init_drb_chain;
219   gimple first_a, first_b;
220   tree node_init;
221   gimple node, prev, next, first_stmt;
222 
223   /* 1. New stmts - both DRA and DRB are not a part of any chain.   */
224   if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
225     {
226       GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227       GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228       GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229       return;
230     }
231 
232   /* 2. DRB is a part of a chain and DRA is not.  */
233   if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
234     {
235       GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236       /* Insert DRA into the chain of DRB.  */
237       vect_insert_into_interleaving_chain (dra, drb);
238       return;
239     }
240 
241   /* 3. DRA is a part of a chain and DRB is not.  */
242   if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
243     {
244       gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245       tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 							      old_first_stmt)));
247       gimple tmp;
248 
249       if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
250 	{
251 	  /* DRB's init is smaller than the init of the stmt previously marked
252 	     as the first stmt of the interleaving chain of DRA.  Therefore, we
253 	     update FIRST_STMT and put DRB in the head of the list.  */
254 	  GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 	  GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
256 
257 	  /* Update all the stmts in the list to point to the new FIRST_STMT.  */
258 	  tmp = old_first_stmt;
259 	  while (tmp)
260 	    {
261 	      GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 	      tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
263 	    }
264 	}
265       else
266 	{
267 	  /* Insert DRB in the list of DRA.  */
268 	  vect_insert_into_interleaving_chain (drb, dra);
269 	  GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
270 	}
271       return;
272     }
273 
274   /* 4. both DRA and DRB are in some interleaving chains.  */
275   first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276   first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277   if (first_a == first_b)
278     return;
279   init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280   init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
281 
282   if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
283     {
284       /* Insert the nodes of DRA chain into the DRB chain.
285 	 After inserting a node, continue from this node of the DRB chain (don't
286          start from the beginning.  */
287       node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288       prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289       first_stmt = first_b;
290     }
291   else
292     {
293       /* Insert the nodes of DRB chain into the DRA chain.
294 	 After inserting a node, continue from this node of the DRA chain (don't
295          start from the beginning.  */
296       node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297       prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298       first_stmt = first_a;
299     }
300 
301   while (node)
302     {
303       node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304       next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305       while (next)
306 	{
307 	  next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 	  if (tree_int_cst_compare (next_init, node_init) > 0)
309 	    {
310 	      /* Insert here.  */
311 	      GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 	      GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 	      prev = node;
314 	      break;
315 	    }
316 	  prev = next;
317 	  next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
318 	}
319       if (!next)
320 	{
321 	  /* We got to the end of the list. Insert here.  */
322 	  GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 	  GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 	  prev = node;
325 	}
326       GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327       node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
328     }
329 }
330 
331 /* Check dependence between DRA and DRB for basic block vectorization.
332    If the accesses share same bases and offsets, we can compare their initial
333    constant offsets to decide whether they differ or not.  In case of a read-
334    write dependence we check that the load is before the store to ensure that
335    vectorization will not change the order of the accesses.  */
336 
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339                                    struct data_reference *drb)
340 {
341   HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342   gimple earlier_stmt;
343 
344   /* We only call this function for pairs of loads and stores, but we verify
345      it here.  */
346   if (DR_IS_READ (dra) == DR_IS_READ (drb))
347     {
348       if (DR_IS_READ (dra))
349         return false;
350       else
351         return true;
352     }
353 
354   /* Check that the data-refs have same bases and offsets.  If not, we can't
355      determine if they are dependent.  */
356   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357       || !dr_equal_offsets_p (dra, drb))
358     return true;
359 
360   /* Check the types.  */
361   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363 
364   if (type_size_a != type_size_b
365       || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366                               TREE_TYPE (DR_REF (drb))))
367     return true;
368 
369   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371 
372   /* Two different locations - no dependence.  */
373   if (init_a != init_b)
374     return false;
375 
376   /* We have a read-write dependence.  Check that the load is before the store.
377      When we vectorize basic blocks, vector load can be only before
378      corresponding scalar load, and vector store can be only after its
379      corresponding scalar store.  So the order of the acceses is preserved in
380      case the load is before the store.  */
381   earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382   if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383     return false;
384 
385   return true;
386 }
387 
388 
389 /* Function vect_check_interleaving.
390 
391    Check if DRA and DRB are a part of interleaving.  In case they are, insert
392    DRA and DRB in an interleaving chain.  */
393 
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 			 struct data_reference *drb)
397 {
398   HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399 
400   /* Check that the data-refs have same first location (except init) and they
401      are both either store or load (not load and store).  */
402   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403       || !dr_equal_offsets_p (dra, drb)
404       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405       || DR_IS_READ (dra) != DR_IS_READ (drb))
406     return false;
407 
408   /* Check:
409      1. data-refs are of the same type
410      2. their steps are equal
411      3. the step (if greater than zero) is greater than the difference between
412         data-refs' inits.  */
413   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
415 
416   if (type_size_a != type_size_b
417       || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418       || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419                               TREE_TYPE (DR_REF (drb))))
420     return false;
421 
422   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424   step = TREE_INT_CST_LOW (DR_STEP (dra));
425 
426   if (init_a > init_b)
427     {
428       /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 	 and DRB is accessed before DRA.  */
430       diff_mod_size = (init_a - init_b) % type_size_a;
431 
432       if (step && (init_a - init_b) > step)
433          return false;
434 
435       if (diff_mod_size == 0)
436 	{
437 	  vect_update_interleaving_chain (drb, dra);
438 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
439 	    {
440 	      fprintf (vect_dump, "Detected interleaving ");
441 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 	      fprintf (vect_dump, " and ");
443 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
444 	    }
445 	  return true;
446 	}
447     }
448   else
449     {
450       /* If init_b == init_a + the size of the type * k, we have an
451 	 interleaving, and DRA is accessed before DRB.  */
452       diff_mod_size = (init_b - init_a) % type_size_a;
453 
454       if (step && (init_b - init_a) > step)
455          return false;
456 
457       if (diff_mod_size == 0)
458 	{
459 	  vect_update_interleaving_chain (dra, drb);
460 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
461 	    {
462 	      fprintf (vect_dump, "Detected interleaving ");
463 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 	      fprintf (vect_dump, " and ");
465 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
466 	    }
467 	  return true;
468 	}
469     }
470 
471   return false;
472 }
473 
474 /* Check if data references pointed by DR_I and DR_J are same or
475    belong to same interleaving group.  Return FALSE if drs are
476    different, otherwise return TRUE.  */
477 
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
480 {
481   gimple stmt_i = DR_STMT (dr_i);
482   gimple stmt_j = DR_STMT (dr_j);
483 
484   if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485       || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 	    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 	    && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 		== GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489     return true;
490   else
491     return false;
492 }
493 
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495    return TRUE, otherwise return FALSE.  */
496 
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
499 {
500   if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501        && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502       || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 	  && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504     return true;
505   else
506     return false;
507 }
508 
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510    tested at run-time.  Return TRUE if DDR was successfully inserted.
511    Return false if versioning is not supported.  */
512 
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
515 {
516   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
517 
518   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519     return false;
520 
521   if (vect_print_dump_info (REPORT_DR_DETAILS))
522     {
523       fprintf (vect_dump, "mark for run-time aliasing test between ");
524       print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525       fprintf (vect_dump, " and ");
526       print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
527     }
528 
529   if (optimize_loop_nest_for_size_p (loop))
530     {
531       if (vect_print_dump_info (REPORT_DR_DETAILS))
532 	fprintf (vect_dump, "versioning not supported when optimizing for size.");
533       return false;
534     }
535 
536   /* FORNOW: We don't support versioning with outer-loop vectorization.  */
537   if (loop->inner)
538     {
539       if (vect_print_dump_info (REPORT_DR_DETAILS))
540 	fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541       return false;
542     }
543 
544   VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545   return true;
546 }
547 
548 
549 /* Function vect_analyze_data_ref_dependence.
550 
551    Return TRUE if there (might) exist a dependence between a memory-reference
552    DRA and a memory-reference DRB.  When versioning for alias may check a
553    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
554    the data dependence.  */
555 
556 static bool
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558                                   loop_vec_info loop_vinfo, int *max_vf)
559 {
560   unsigned int i;
561   struct loop *loop = NULL;
562   struct data_reference *dra = DDR_A (ddr);
563   struct data_reference *drb = DDR_B (ddr);
564   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
565   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
566   lambda_vector dist_v;
567   unsigned int loop_depth;
568 
569   /* Don't bother to analyze statements marked as unvectorizable.  */
570   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
571       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
572     return false;
573 
574   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
575     {
576       /* Independent data accesses.  */
577       vect_check_interleaving (dra, drb);
578       return false;
579     }
580 
581   if (loop_vinfo)
582     loop = LOOP_VINFO_LOOP (loop_vinfo);
583 
584   if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
585     return false;
586 
587   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
588     {
589       gimple earlier_stmt;
590 
591       if (loop_vinfo)
592         {
593           if (vect_print_dump_info (REPORT_DR_DETAILS))
594             {
595               fprintf (vect_dump, "versioning for alias required: "
596                                   "can't determine dependence between ");
597               print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
598               fprintf (vect_dump, " and ");
599               print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
600             }
601 
602           /* Add to list of ddrs that need to be tested at run-time.  */
603           return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
604         }
605 
606       /* When vectorizing a basic block unknown depnedence can still mean
607 	 strided access.  */
608       if (vect_check_interleaving (dra, drb))
609          return false;
610 
611       /* Read-read is OK (we need this check here, after checking for
612          interleaving).  */
613       if (DR_IS_READ (dra) && DR_IS_READ (drb))
614         return false;
615 
616       if (vect_print_dump_info (REPORT_DR_DETAILS))
617         {
618           fprintf (vect_dump, "can't determine dependence between ");
619           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
620           fprintf (vect_dump, " and ");
621           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
622         }
623 
624       /* We do not vectorize basic blocks with write-write dependencies.  */
625       if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
626         return true;
627 
628       /* Check that it's not a load-after-store dependence.  */
629       earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
630       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
631         return true;
632 
633       return false;
634     }
635 
636   /* Versioning for alias is not yet supported for basic block SLP, and
637      dependence distance is unapplicable, hence, in case of known data
638      dependence, basic block vectorization is impossible for now.  */
639   if (!loop_vinfo)
640     {
641       if (dra != drb && vect_check_interleaving (dra, drb))
642         return false;
643 
644       if (vect_print_dump_info (REPORT_DR_DETAILS))
645         {
646           fprintf (vect_dump, "determined dependence between ");
647           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
648           fprintf (vect_dump, " and ");
649           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
650         }
651 
652       /* Do not vectorize basic blcoks with write-write dependences.  */
653       if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
654         return true;
655 
656       /* Check if this dependence is allowed in basic block vectorization.  */
657       return vect_drs_dependent_in_basic_block (dra, drb);
658     }
659 
660   /* Loop-based vectorization and known data dependence.  */
661   if (DDR_NUM_DIST_VECTS (ddr) == 0)
662     {
663       if (vect_print_dump_info (REPORT_DR_DETAILS))
664         {
665           fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
666           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
667           fprintf (vect_dump, " and ");
668           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
669         }
670       /* Add to list of ddrs that need to be tested at run-time.  */
671       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
672     }
673 
674   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
675   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
676     {
677       int dist = dist_v[loop_depth];
678 
679       if (vect_print_dump_info (REPORT_DR_DETAILS))
680 	fprintf (vect_dump, "dependence distance  = %d.", dist);
681 
682       if (dist == 0)
683 	{
684 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
685 	    {
686 	      fprintf (vect_dump, "dependence distance == 0 between ");
687 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
688 	      fprintf (vect_dump, " and ");
689 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
690 	    }
691 
692           /* For interleaving, mark that there is a read-write dependency if
693              necessary. We check before that one of the data-refs is store.  */
694           if (DR_IS_READ (dra))
695             GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
696 	  else
697             {
698               if (DR_IS_READ (drb))
699                 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
700 	    }
701 
702 	  continue;
703 	}
704 
705       if (dist > 0 && DDR_REVERSED_P (ddr))
706 	{
707 	  /* If DDR_REVERSED_P the order of the data-refs in DDR was
708 	     reversed (to make distance vector positive), and the actual
709 	     distance is negative.  */
710 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
711 	    fprintf (vect_dump, "dependence distance negative.");
712 	  continue;
713 	}
714 
715       if (abs (dist) >= 2
716 	  && abs (dist) < *max_vf)
717 	{
718 	  /* The dependence distance requires reduction of the maximal
719 	     vectorization factor.  */
720 	  *max_vf = abs (dist);
721 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
722 	    fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
723 		     *max_vf);
724 	}
725 
726       if (abs (dist) >= *max_vf)
727 	{
728 	  /* Dependence distance does not create dependence, as far as
729 	     vectorization is concerned, in this case.  */
730 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
731 	    fprintf (vect_dump, "dependence distance >= VF.");
732 	  continue;
733 	}
734 
735       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
736 	{
737 	  fprintf (vect_dump, "not vectorized, possible dependence "
738     		              "between data-refs ");
739 	  print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
740 	  fprintf (vect_dump, " and ");
741 	  print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
742 	}
743 
744       return true;
745     }
746 
747   return false;
748 }
749 
750 /* Function vect_analyze_data_ref_dependences.
751 
752    Examine all the data references in the loop, and make sure there do not
753    exist any data dependences between them.  Set *MAX_VF according to
754    the maximum vectorization factor the data dependences allow.  */
755 
756 bool
757 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
758                                    bb_vec_info bb_vinfo, int *max_vf)
759 {
760   unsigned int i;
761   VEC (ddr_p, heap) *ddrs = NULL;
762   struct data_dependence_relation *ddr;
763 
764   if (vect_print_dump_info (REPORT_DETAILS))
765     fprintf (vect_dump, "=== vect_analyze_dependences ===");
766 
767   if (loop_vinfo)
768     ddrs = LOOP_VINFO_DDRS (loop_vinfo);
769   else
770     ddrs = BB_VINFO_DDRS (bb_vinfo);
771 
772   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
773     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
774       return false;
775 
776   return true;
777 }
778 
779 
780 /* Function vect_compute_data_ref_alignment
781 
782    Compute the misalignment of the data reference DR.
783 
784    Output:
785    1. If during the misalignment computation it is found that the data reference
786       cannot be vectorized then false is returned.
787    2. DR_MISALIGNMENT (DR) is defined.
788 
789    FOR NOW: No analysis is actually performed. Misalignment is calculated
790    only for trivial cases. TODO.  */
791 
792 static bool
793 vect_compute_data_ref_alignment (struct data_reference *dr)
794 {
795   gimple stmt = DR_STMT (dr);
796   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
798   struct loop *loop = NULL;
799   tree ref = DR_REF (dr);
800   tree vectype;
801   tree base, base_addr;
802   bool base_aligned;
803   tree misalign;
804   tree aligned_to, alignment;
805 
806   if (vect_print_dump_info (REPORT_DETAILS))
807     fprintf (vect_dump, "vect_compute_data_ref_alignment:");
808 
809   if (loop_vinfo)
810     loop = LOOP_VINFO_LOOP (loop_vinfo);
811 
812   /* Initialize misalignment to unknown.  */
813   SET_DR_MISALIGNMENT (dr, -1);
814 
815   misalign = DR_INIT (dr);
816   aligned_to = DR_ALIGNED_TO (dr);
817   base_addr = DR_BASE_ADDRESS (dr);
818   vectype = STMT_VINFO_VECTYPE (stmt_info);
819 
820   /* In case the dataref is in an inner-loop of the loop that is being
821      vectorized (LOOP), we use the base and misalignment information
822      relative to the outer-loop (LOOP).  This is ok only if the misalignment
823      stays the same throughout the execution of the inner-loop, which is why
824      we have to check that the stride of the dataref in the inner-loop evenly
825      divides by the vector size.  */
826   if (loop && nested_in_vect_loop_p (loop, stmt))
827     {
828       tree step = DR_STEP (dr);
829       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
830 
831       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
832         {
833           if (vect_print_dump_info (REPORT_ALIGNMENT))
834             fprintf (vect_dump, "inner step divides the vector-size.");
835 	  misalign = STMT_VINFO_DR_INIT (stmt_info);
836 	  aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
837 	  base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
838         }
839       else
840 	{
841 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
842 	    fprintf (vect_dump, "inner step doesn't divide the vector-size.");
843 	  misalign = NULL_TREE;
844 	}
845     }
846 
847   base = build_fold_indirect_ref (base_addr);
848   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
849 
850   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
851       || !misalign)
852     {
853       if (vect_print_dump_info (REPORT_ALIGNMENT))
854 	{
855 	  fprintf (vect_dump, "Unknown alignment for access: ");
856 	  print_generic_expr (vect_dump, base, TDF_SLIM);
857 	}
858       return true;
859     }
860 
861   if ((DECL_P (base)
862        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
863 				alignment) >= 0)
864       || (TREE_CODE (base_addr) == SSA_NAME
865 	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
866 						      TREE_TYPE (base_addr)))),
867 				   alignment) >= 0)
868       || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
869     base_aligned = true;
870   else
871     base_aligned = false;
872 
873   if (!base_aligned)
874     {
875       /* Do not change the alignment of global variables if
876 	 flag_section_anchors is enabled.  */
877       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
878 	  || (TREE_STATIC (base) && flag_section_anchors))
879 	{
880 	  if (vect_print_dump_info (REPORT_DETAILS))
881 	    {
882 	      fprintf (vect_dump, "can't force alignment of ref: ");
883 	      print_generic_expr (vect_dump, ref, TDF_SLIM);
884 	    }
885 	  return true;
886 	}
887 
888       /* Force the alignment of the decl.
889 	 NOTE: This is the only change to the code we make during
890 	 the analysis phase, before deciding to vectorize the loop.  */
891       if (vect_print_dump_info (REPORT_DETAILS))
892         {
893           fprintf (vect_dump, "force alignment of ");
894           print_generic_expr (vect_dump, ref, TDF_SLIM);
895         }
896 
897       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
898       DECL_USER_ALIGN (base) = 1;
899     }
900 
901   /* At this point we assume that the base is aligned.  */
902   gcc_assert (base_aligned
903 	      || (TREE_CODE (base) == VAR_DECL
904 		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
905 
906   /* If this is a backward running DR then first access in the larger
907      vectype actually is N-1 elements before the address in the DR.
908      Adjust misalign accordingly.  */
909   if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
910     {
911       tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
912       /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
913 	 otherwise we wouldn't be here.  */
914       offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
915       /* PLUS because DR_STEP was negative.  */
916       misalign = size_binop (PLUS_EXPR, misalign, offset);
917     }
918 
919   /* Modulo alignment.  */
920   misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
921 
922   if (!host_integerp (misalign, 1))
923     {
924       /* Negative or overflowed misalignment value.  */
925       if (vect_print_dump_info (REPORT_DETAILS))
926 	fprintf (vect_dump, "unexpected misalign value");
927       return false;
928     }
929 
930   SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
931 
932   if (vect_print_dump_info (REPORT_DETAILS))
933     {
934       fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
935       print_generic_expr (vect_dump, ref, TDF_SLIM);
936     }
937 
938   return true;
939 }
940 
941 
942 /* Function vect_compute_data_refs_alignment
943 
944    Compute the misalignment of data references in the loop.
945    Return FALSE if a data reference is found that cannot be vectorized.  */
946 
947 static bool
948 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
949                                   bb_vec_info bb_vinfo)
950 {
951   VEC (data_reference_p, heap) *datarefs;
952   struct data_reference *dr;
953   unsigned int i;
954 
955   if (loop_vinfo)
956     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
957   else
958     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
959 
960   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
961     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
962         && !vect_compute_data_ref_alignment (dr))
963       {
964         if (bb_vinfo)
965           {
966             /* Mark unsupported statement as unvectorizable.  */
967             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
968             continue;
969           }
970         else
971           return false;
972       }
973 
974   return true;
975 }
976 
977 
978 /* Function vect_update_misalignment_for_peel
979 
980    DR - the data reference whose misalignment is to be adjusted.
981    DR_PEEL - the data reference whose misalignment is being made
982              zero in the vector loop by the peel.
983    NPEEL - the number of iterations in the peel loop if the misalignment
984            of DR_PEEL is known at compile time.  */
985 
986 static void
987 vect_update_misalignment_for_peel (struct data_reference *dr,
988                                    struct data_reference *dr_peel, int npeel)
989 {
990   unsigned int i;
991   VEC(dr_p,heap) *same_align_drs;
992   struct data_reference *current_dr;
993   int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
994   int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
995   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
996   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
997 
998  /* For interleaved data accesses the step in the loop must be multiplied by
999      the size of the interleaving group.  */
1000   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1001     dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1002   if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1003     dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1004 
1005   /* It can be assumed that the data refs with the same alignment as dr_peel
1006      are aligned in the vector loop.  */
1007   same_align_drs
1008     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1009   FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1010     {
1011       if (current_dr != dr)
1012         continue;
1013       gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1014                   DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1015       SET_DR_MISALIGNMENT (dr, 0);
1016       return;
1017     }
1018 
1019   if (known_alignment_for_access_p (dr)
1020       && known_alignment_for_access_p (dr_peel))
1021     {
1022       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1023       int misal = DR_MISALIGNMENT (dr);
1024       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1025       misal += negative ? -npeel * dr_size : npeel * dr_size;
1026       misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1027       SET_DR_MISALIGNMENT (dr, misal);
1028       return;
1029     }
1030 
1031   if (vect_print_dump_info (REPORT_DETAILS))
1032     fprintf (vect_dump, "Setting misalignment to -1.");
1033   SET_DR_MISALIGNMENT (dr, -1);
1034 }
1035 
1036 
1037 /* Function vect_verify_datarefs_alignment
1038 
1039    Return TRUE if all data references in the loop can be
1040    handled with respect to alignment.  */
1041 
1042 bool
1043 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1044 {
1045   VEC (data_reference_p, heap) *datarefs;
1046   struct data_reference *dr;
1047   enum dr_alignment_support supportable_dr_alignment;
1048   unsigned int i;
1049 
1050   if (loop_vinfo)
1051     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1052   else
1053     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1054 
1055   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1056     {
1057       gimple stmt = DR_STMT (dr);
1058       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1059 
1060       /* For interleaving, only the alignment of the first access matters.
1061          Skip statements marked as not vectorizable.  */
1062       if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1063            && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1064           || !STMT_VINFO_VECTORIZABLE (stmt_info))
1065         continue;
1066 
1067       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1068       if (!supportable_dr_alignment)
1069         {
1070           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1071             {
1072               if (DR_IS_READ (dr))
1073                 fprintf (vect_dump,
1074                          "not vectorized: unsupported unaligned load.");
1075               else
1076                 fprintf (vect_dump,
1077                          "not vectorized: unsupported unaligned store.");
1078 
1079               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1080             }
1081           return false;
1082         }
1083       if (supportable_dr_alignment != dr_aligned
1084           && vect_print_dump_info (REPORT_ALIGNMENT))
1085         fprintf (vect_dump, "Vectorizing an unaligned access.");
1086     }
1087   return true;
1088 }
1089 
1090 
1091 /* Function vector_alignment_reachable_p
1092 
1093    Return true if vector alignment for DR is reachable by peeling
1094    a few loop iterations.  Return false otherwise.  */
1095 
1096 static bool
1097 vector_alignment_reachable_p (struct data_reference *dr)
1098 {
1099   gimple stmt = DR_STMT (dr);
1100   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1101   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1102 
1103   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1104     {
1105       /* For interleaved access we peel only if number of iterations in
1106 	 the prolog loop ({VF - misalignment}), is a multiple of the
1107 	 number of the interleaved accesses.  */
1108       int elem_size, mis_in_elements;
1109       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1110 
1111       /* FORNOW: handle only known alignment.  */
1112       if (!known_alignment_for_access_p (dr))
1113 	return false;
1114 
1115       elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1116       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1117 
1118       if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1119 	return false;
1120     }
1121 
1122   /* If misalignment is known at the compile time then allow peeling
1123      only if natural alignment is reachable through peeling.  */
1124   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1125     {
1126       HOST_WIDE_INT elmsize =
1127 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1128       if (vect_print_dump_info (REPORT_DETAILS))
1129 	{
1130 	  fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1131 	  fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1132 	}
1133       if (DR_MISALIGNMENT (dr) % elmsize)
1134 	{
1135 	  if (vect_print_dump_info (REPORT_DETAILS))
1136 	    fprintf (vect_dump, "data size does not divide the misalignment.\n");
1137 	  return false;
1138 	}
1139     }
1140 
1141   if (!known_alignment_for_access_p (dr))
1142     {
1143       tree type = (TREE_TYPE (DR_REF (dr)));
1144       bool is_packed = contains_packed_reference (DR_REF (dr));
1145 
1146       if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1147 	is_packed = true;
1148 
1149       if (vect_print_dump_info (REPORT_DETAILS))
1150 	fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1151       if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1152 	return true;
1153       else
1154 	return false;
1155     }
1156 
1157   return true;
1158 }
1159 
1160 
1161 /* Calculate the cost of the memory access represented by DR.  */
1162 
1163 static void
1164 vect_get_data_access_cost (struct data_reference *dr,
1165                            unsigned int *inside_cost,
1166                            unsigned int *outside_cost)
1167 {
1168   gimple stmt = DR_STMT (dr);
1169   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1170   int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1171   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1172   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173   int ncopies = vf / nunits;
1174   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1175 
1176   if (!supportable_dr_alignment)
1177     *inside_cost = VECT_MAX_COST;
1178   else
1179     {
1180       if (DR_IS_READ (dr))
1181         vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1182       else
1183         vect_get_store_cost (dr, ncopies, inside_cost);
1184     }
1185 
1186   if (vect_print_dump_info (REPORT_COST))
1187     fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1188              "outside_cost = %d.", *inside_cost, *outside_cost);
1189 }
1190 
1191 
1192 static hashval_t
1193 vect_peeling_hash (const void *elem)
1194 {
1195   const struct _vect_peel_info *peel_info;
1196 
1197   peel_info = (const struct _vect_peel_info *) elem;
1198   return (hashval_t) peel_info->npeel;
1199 }
1200 
1201 
1202 static int
1203 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1204 {
1205   const struct _vect_peel_info *a, *b;
1206 
1207   a = (const struct _vect_peel_info *) elem1;
1208   b = (const struct _vect_peel_info *) elem2;
1209   return (a->npeel == b->npeel);
1210 }
1211 
1212 
1213 /* Insert DR into peeling hash table with NPEEL as key.  */
1214 
1215 static void
1216 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1217                           int npeel)
1218 {
1219   struct _vect_peel_info elem, *slot;
1220   void **new_slot;
1221   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1222 
1223   elem.npeel = npeel;
1224   slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1225                                      &elem);
1226   if (slot)
1227     slot->count++;
1228   else
1229     {
1230       slot = XNEW (struct _vect_peel_info);
1231       slot->npeel = npeel;
1232       slot->dr = dr;
1233       slot->count = 1;
1234       new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1235                                  INSERT);
1236       *new_slot = slot;
1237     }
1238 
1239   if (!supportable_dr_alignment && !flag_vect_cost_model)
1240     slot->count += VECT_MAX_COST;
1241 }
1242 
1243 
1244 /* Traverse peeling hash table to find peeling option that aligns maximum
1245    number of data accesses.  */
1246 
1247 static int
1248 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1249 {
1250   vect_peel_info elem = (vect_peel_info) *slot;
1251   vect_peel_extended_info max = (vect_peel_extended_info) data;
1252 
1253   if (elem->count > max->peel_info.count
1254       || (elem->count == max->peel_info.count
1255           && max->peel_info.npeel > elem->npeel))
1256     {
1257       max->peel_info.npeel = elem->npeel;
1258       max->peel_info.count = elem->count;
1259       max->peel_info.dr = elem->dr;
1260     }
1261 
1262   return 1;
1263 }
1264 
1265 
1266 /* Traverse peeling hash table and calculate cost for each peeling option.
1267    Find the one with the lowest cost.  */
1268 
1269 static int
1270 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1271 {
1272   vect_peel_info elem = (vect_peel_info) *slot;
1273   vect_peel_extended_info min = (vect_peel_extended_info) data;
1274   int save_misalignment, dummy;
1275   unsigned int inside_cost = 0, outside_cost = 0, i;
1276   gimple stmt = DR_STMT (elem->dr);
1277   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1278   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1279   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1280   struct data_reference *dr;
1281 
1282   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1283     {
1284       stmt = DR_STMT (dr);
1285       stmt_info = vinfo_for_stmt (stmt);
1286       /* For interleaving, only the alignment of the first access
1287          matters.  */
1288       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1289           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1290         continue;
1291 
1292       save_misalignment = DR_MISALIGNMENT (dr);
1293       vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1294       vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1295       SET_DR_MISALIGNMENT (dr, save_misalignment);
1296     }
1297 
1298   outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1299                          vect_get_single_scalar_iteration_cost (loop_vinfo));
1300 
1301   if (inside_cost < min->inside_cost
1302       || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1303     {
1304       min->inside_cost = inside_cost;
1305       min->outside_cost = outside_cost;
1306       min->peel_info.dr = elem->dr;
1307       min->peel_info.npeel = elem->npeel;
1308     }
1309 
1310   return 1;
1311 }
1312 
1313 
1314 /* Choose best peeling option by traversing peeling hash table and either
1315    choosing an option with the lowest cost (if cost model is enabled) or the
1316    option that aligns as many accesses as possible.  */
1317 
1318 static struct data_reference *
1319 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1320                                        unsigned int *npeel)
1321 {
1322    struct _vect_peel_extended_info res;
1323 
1324    res.peel_info.dr = NULL;
1325 
1326    if (flag_vect_cost_model)
1327      {
1328        res.inside_cost = INT_MAX;
1329        res.outside_cost = INT_MAX;
1330        htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1331                       vect_peeling_hash_get_lowest_cost, &res);
1332      }
1333    else
1334      {
1335        res.peel_info.count = 0;
1336        htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1337                       vect_peeling_hash_get_most_frequent, &res);
1338      }
1339 
1340    *npeel = res.peel_info.npeel;
1341    return res.peel_info.dr;
1342 }
1343 
1344 
1345 /* Function vect_enhance_data_refs_alignment
1346 
1347    This pass will use loop versioning and loop peeling in order to enhance
1348    the alignment of data references in the loop.
1349 
1350    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1351    original loop is to be vectorized.  Any other loops that are created by
1352    the transformations performed in this pass - are not supposed to be
1353    vectorized.  This restriction will be relaxed.
1354 
1355    This pass will require a cost model to guide it whether to apply peeling
1356    or versioning or a combination of the two.  For example, the scheme that
1357    intel uses when given a loop with several memory accesses, is as follows:
1358    choose one memory access ('p') which alignment you want to force by doing
1359    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1360    other accesses are not necessarily aligned, or (2) use loop versioning to
1361    generate one loop in which all accesses are aligned, and another loop in
1362    which only 'p' is necessarily aligned.
1363 
1364    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1365    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1366    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1367 
1368    Devising a cost model is the most critical aspect of this work.  It will
1369    guide us on which access to peel for, whether to use loop versioning, how
1370    many versions to create, etc.  The cost model will probably consist of
1371    generic considerations as well as target specific considerations (on
1372    powerpc for example, misaligned stores are more painful than misaligned
1373    loads).
1374 
1375    Here are the general steps involved in alignment enhancements:
1376 
1377      -- original loop, before alignment analysis:
1378 	for (i=0; i<N; i++){
1379 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
1380 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1381 	}
1382 
1383      -- After vect_compute_data_refs_alignment:
1384 	for (i=0; i<N; i++){
1385 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1386 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1387 	}
1388 
1389      -- Possibility 1: we do loop versioning:
1390      if (p is aligned) {
1391 	for (i=0; i<N; i++){	# loop 1A
1392 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1393 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1394 	}
1395      }
1396      else {
1397 	for (i=0; i<N; i++){	# loop 1B
1398 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1399 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1400 	}
1401      }
1402 
1403      -- Possibility 2: we do loop peeling:
1404      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1405 	x = q[i];
1406 	p[i] = y;
1407      }
1408      for (i = 3; i < N; i++){	# loop 2A
1409 	x = q[i];			# DR_MISALIGNMENT(q) = 0
1410 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1411      }
1412 
1413      -- Possibility 3: combination of loop peeling and versioning:
1414      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1415 	x = q[i];
1416 	p[i] = y;
1417      }
1418      if (p is aligned) {
1419 	for (i = 3; i<N; i++){	# loop 3A
1420 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1421 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1422 	}
1423      }
1424      else {
1425 	for (i = 3; i<N; i++){	# loop 3B
1426 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1427 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1428 	}
1429      }
1430 
1431      These loops are later passed to loop_transform to be vectorized.  The
1432      vectorizer will use the alignment information to guide the transformation
1433      (whether to generate regular loads/stores, or with special handling for
1434      misalignment).  */
1435 
1436 bool
1437 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1438 {
1439   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1440   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1441   enum dr_alignment_support supportable_dr_alignment;
1442   struct data_reference *dr0 = NULL, *first_store = NULL;
1443   struct data_reference *dr;
1444   unsigned int i, j;
1445   bool do_peeling = false;
1446   bool do_versioning = false;
1447   bool stat;
1448   gimple stmt;
1449   stmt_vec_info stmt_info;
1450   int vect_versioning_for_alias_required;
1451   unsigned int npeel = 0;
1452   bool all_misalignments_unknown = true;
1453   unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1454   unsigned possible_npeel_number = 1;
1455   tree vectype;
1456   unsigned int nelements, mis, same_align_drs_max = 0;
1457 
1458   if (vect_print_dump_info (REPORT_DETAILS))
1459     fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1460 
1461   /* While cost model enhancements are expected in the future, the high level
1462      view of the code at this time is as follows:
1463 
1464      A) If there is a misaligned access then see if peeling to align
1465         this access can make all data references satisfy
1466         vect_supportable_dr_alignment.  If so, update data structures
1467         as needed and return true.
1468 
1469      B) If peeling wasn't possible and there is a data reference with an
1470         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1471         then see if loop versioning checks can be used to make all data
1472         references satisfy vect_supportable_dr_alignment.  If so, update
1473         data structures as needed and return true.
1474 
1475      C) If neither peeling nor versioning were successful then return false if
1476         any data reference does not satisfy vect_supportable_dr_alignment.
1477 
1478      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1479 
1480      Note, Possibility 3 above (which is peeling and versioning together) is not
1481      being done at this time.  */
1482 
1483   /* (1) Peeling to force alignment.  */
1484 
1485   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1486      Considerations:
1487      + How many accesses will become aligned due to the peeling
1488      - How many accesses will become unaligned due to the peeling,
1489        and the cost of misaligned accesses.
1490      - The cost of peeling (the extra runtime checks, the increase
1491        in code size).  */
1492 
1493   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1494     {
1495       stmt = DR_STMT (dr);
1496       stmt_info = vinfo_for_stmt (stmt);
1497 
1498       if (!STMT_VINFO_RELEVANT (stmt_info))
1499 	continue;
1500 
1501       /* For interleaving, only the alignment of the first access
1502          matters.  */
1503       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1504           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1505         continue;
1506 
1507       /* For invariant accesses there is nothing to enhance.  */
1508       if (integer_zerop (DR_STEP (dr)))
1509 	continue;
1510 
1511       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1512       do_peeling = vector_alignment_reachable_p (dr);
1513       if (do_peeling)
1514         {
1515           if (known_alignment_for_access_p (dr))
1516             {
1517               unsigned int npeel_tmp;
1518 	      bool negative = tree_int_cst_compare (DR_STEP (dr),
1519 						    size_zero_node) < 0;
1520 
1521               /* Save info about DR in the hash table.  */
1522               if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1523                 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1524                            htab_create (1, vect_peeling_hash,
1525                                         vect_peeling_hash_eq, free);
1526 
1527               vectype = STMT_VINFO_VECTYPE (stmt_info);
1528               nelements = TYPE_VECTOR_SUBPARTS (vectype);
1529               mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1530                                                 TREE_TYPE (DR_REF (dr))));
1531               npeel_tmp = (negative
1532 			   ? (mis - nelements) : (nelements - mis))
1533 		  & (nelements - 1);
1534 
1535               /* For multiple types, it is possible that the bigger type access
1536                  will have more than one peeling option.  E.g., a loop with two
1537                  types: one of size (vector size / 4), and the other one of
1538                  size (vector size / 8).  Vectorization factor will 8.  If both
1539                  access are misaligned by 3, the first one needs one scalar
1540                  iteration to be aligned, and the second one needs 5.  But the
1541                  the first one will be aligned also by peeling 5 scalar
1542                  iterations, and in that case both accesses will be aligned.
1543                  Hence, except for the immediate peeling amount, we also want
1544                  to try to add full vector size, while we don't exceed
1545                  vectorization factor.
1546                  We do this automtically for cost model, since we calculate cost
1547                  for every peeling option.  */
1548               if (!flag_vect_cost_model)
1549                 possible_npeel_number = vf /nelements;
1550 
1551               /* Handle the aligned case. We may decide to align some other
1552                  access, making DR unaligned.  */
1553               if (DR_MISALIGNMENT (dr) == 0)
1554                 {
1555                   npeel_tmp = 0;
1556                   if (!flag_vect_cost_model)
1557                     possible_npeel_number++;
1558                 }
1559 
1560               for (j = 0; j < possible_npeel_number; j++)
1561                 {
1562                   gcc_assert (npeel_tmp <= vf);
1563                   vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1564                   npeel_tmp += nelements;
1565                 }
1566 
1567               all_misalignments_unknown = false;
1568               /* Data-ref that was chosen for the case that all the
1569                  misalignments are unknown is not relevant anymore, since we
1570                  have a data-ref with known alignment.  */
1571               dr0 = NULL;
1572             }
1573           else
1574             {
1575               /* If we don't know all the misalignment values, we prefer
1576                  peeling for data-ref that has maximum number of data-refs
1577                  with the same alignment, unless the target prefers to align
1578                  stores over load.  */
1579               if (all_misalignments_unknown)
1580                 {
1581                   if (same_align_drs_max  < VEC_length (dr_p,
1582                                        STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1583                       || !dr0)
1584                     {
1585                       same_align_drs_max = VEC_length (dr_p,
1586                                        STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1587                       dr0 = dr;
1588                     }
1589 
1590                   if (!first_store && DR_IS_WRITE (dr))
1591                     first_store = dr;
1592                 }
1593 
1594               /* If there are both known and unknown misaligned accesses in the
1595                  loop, we choose peeling amount according to the known
1596                  accesses.  */
1597 
1598 
1599               if (!supportable_dr_alignment)
1600                 {
1601                   dr0 = dr;
1602                   if (!first_store && DR_IS_WRITE (dr))
1603                     first_store = dr;
1604                 }
1605             }
1606         }
1607       else
1608         {
1609           if (!aligned_access_p (dr))
1610             {
1611               if (vect_print_dump_info (REPORT_DETAILS))
1612                 fprintf (vect_dump, "vector alignment may not be reachable");
1613 
1614               break;
1615             }
1616         }
1617     }
1618 
1619   vect_versioning_for_alias_required
1620     = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1621 
1622   /* Temporarily, if versioning for alias is required, we disable peeling
1623      until we support peeling and versioning.  Often peeling for alignment
1624      will require peeling for loop-bound, which in turn requires that we
1625      know how to adjust the loop ivs after the loop.  */
1626   if (vect_versioning_for_alias_required
1627       || !vect_can_advance_ivs_p (loop_vinfo)
1628       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1629     do_peeling = false;
1630 
1631   if (do_peeling && all_misalignments_unknown
1632       && vect_supportable_dr_alignment (dr0, false))
1633     {
1634 
1635       /* Check if the target requires to prefer stores over loads, i.e., if
1636          misaligned stores are more expensive than misaligned loads (taking
1637          drs with same alignment into account).  */
1638       if (first_store && DR_IS_READ (dr0))
1639         {
1640           unsigned int load_inside_cost = 0, load_outside_cost = 0;
1641           unsigned int store_inside_cost = 0, store_outside_cost = 0;
1642           unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1643           unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1644 
1645           vect_get_data_access_cost (dr0, &load_inside_cost,
1646                                      &load_outside_cost);
1647           vect_get_data_access_cost (first_store, &store_inside_cost,
1648                                      &store_outside_cost);
1649 
1650           /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1651              aligning the load DR0).  */
1652           load_inside_penalty = store_inside_cost;
1653           load_outside_penalty = store_outside_cost;
1654           for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1655                                    (vinfo_for_stmt (DR_STMT (first_store))),
1656                                    i, dr);
1657                i++)
1658             if (DR_IS_READ (dr))
1659               {
1660                 load_inside_penalty += load_inside_cost;
1661                 load_outside_penalty += load_outside_cost;
1662               }
1663             else
1664               {
1665                 load_inside_penalty += store_inside_cost;
1666                 load_outside_penalty += store_outside_cost;
1667               }
1668 
1669           /* Calculate the penalty for leaving DR0 unaligned (by
1670              aligning the FIRST_STORE).  */
1671           store_inside_penalty = load_inside_cost;
1672           store_outside_penalty = load_outside_cost;
1673           for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1674                                    (vinfo_for_stmt (DR_STMT (dr0))),
1675                                    i, dr);
1676                i++)
1677             if (DR_IS_READ (dr))
1678               {
1679                 store_inside_penalty += load_inside_cost;
1680                 store_outside_penalty += load_outside_cost;
1681               }
1682             else
1683               {
1684                 store_inside_penalty += store_inside_cost;
1685                 store_outside_penalty += store_outside_cost;
1686               }
1687 
1688           if (load_inside_penalty > store_inside_penalty
1689               || (load_inside_penalty == store_inside_penalty
1690                   && load_outside_penalty > store_outside_penalty))
1691             dr0 = first_store;
1692         }
1693 
1694       /* In case there are only loads with different unknown misalignments, use
1695          peeling only if it may help to align other accesses in the loop.  */
1696       if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1697                                             (vinfo_for_stmt (DR_STMT (dr0))))
1698           && vect_supportable_dr_alignment (dr0, false)
1699               != dr_unaligned_supported)
1700         do_peeling = false;
1701     }
1702 
1703   if (do_peeling && !dr0)
1704     {
1705       /* Peeling is possible, but there is no data access that is not supported
1706          unless aligned. So we try to choose the best possible peeling.  */
1707 
1708       /* We should get here only if there are drs with known misalignment.  */
1709       gcc_assert (!all_misalignments_unknown);
1710 
1711       /* Choose the best peeling from the hash table.  */
1712       dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1713       if (!dr0 || !npeel)
1714         do_peeling = false;
1715     }
1716 
1717   if (do_peeling)
1718     {
1719       stmt = DR_STMT (dr0);
1720       stmt_info = vinfo_for_stmt (stmt);
1721       vectype = STMT_VINFO_VECTYPE (stmt_info);
1722       nelements = TYPE_VECTOR_SUBPARTS (vectype);
1723 
1724       if (known_alignment_for_access_p (dr0))
1725         {
1726 	  bool negative = tree_int_cst_compare (DR_STEP (dr0),
1727 						size_zero_node) < 0;
1728           if (!npeel)
1729             {
1730               /* Since it's known at compile time, compute the number of
1731                  iterations in the peeled loop (the peeling factor) for use in
1732                  updating DR_MISALIGNMENT values.  The peeling factor is the
1733                  vectorization factor minus the misalignment as an element
1734                  count.  */
1735               mis = DR_MISALIGNMENT (dr0);
1736               mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1737               npeel = ((negative ? mis - nelements : nelements - mis)
1738 		       & (nelements - 1));
1739             }
1740 
1741 	  /* For interleaved data access every iteration accesses all the
1742 	     members of the group, therefore we divide the number of iterations
1743 	     by the group size.  */
1744 	  stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1745 	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1746 	    npeel /= GROUP_SIZE (stmt_info);
1747 
1748           if (vect_print_dump_info (REPORT_DETAILS))
1749             fprintf (vect_dump, "Try peeling by %d", npeel);
1750         }
1751 
1752       /* Ensure that all data refs can be vectorized after the peel.  */
1753       FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1754         {
1755           int save_misalignment;
1756 
1757 	  if (dr == dr0)
1758 	    continue;
1759 
1760 	  stmt = DR_STMT (dr);
1761 	  stmt_info = vinfo_for_stmt (stmt);
1762 	  /* For interleaving, only the alignment of the first access
1763             matters.  */
1764 	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1765 	      && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1766 	    continue;
1767 
1768 	  save_misalignment = DR_MISALIGNMENT (dr);
1769 	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1770 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1771 	  SET_DR_MISALIGNMENT (dr, save_misalignment);
1772 
1773 	  if (!supportable_dr_alignment)
1774 	    {
1775 	      do_peeling = false;
1776 	      break;
1777 	    }
1778 	}
1779 
1780       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1781         {
1782           stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1783           if (!stat)
1784             do_peeling = false;
1785           else
1786             return stat;
1787         }
1788 
1789       if (do_peeling)
1790         {
1791           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1792              If the misalignment of DR_i is identical to that of dr0 then set
1793              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1794              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1795              by the peeling factor times the element size of DR_i (MOD the
1796              vectorization factor times the size).  Otherwise, the
1797              misalignment of DR_i must be set to unknown.  */
1798 	  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1799 	    if (dr != dr0)
1800 	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1801 
1802           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1803           if (npeel)
1804             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1805           else
1806             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1807 	  SET_DR_MISALIGNMENT (dr0, 0);
1808 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1809             fprintf (vect_dump, "Alignment of access forced using peeling.");
1810 
1811           if (vect_print_dump_info (REPORT_DETAILS))
1812             fprintf (vect_dump, "Peeling for alignment will be applied.");
1813 
1814 	  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1815 	  gcc_assert (stat);
1816           return stat;
1817         }
1818     }
1819 
1820 
1821   /* (2) Versioning to force alignment.  */
1822 
1823   /* Try versioning if:
1824      1) flag_tree_vect_loop_version is TRUE
1825      2) optimize loop for speed
1826      3) there is at least one unsupported misaligned data ref with an unknown
1827         misalignment, and
1828      4) all misaligned data refs with a known misalignment are supported, and
1829      5) the number of runtime alignment checks is within reason.  */
1830 
1831   do_versioning =
1832 	flag_tree_vect_loop_version
1833 	&& optimize_loop_nest_for_speed_p (loop)
1834 	&& (!loop->inner); /* FORNOW */
1835 
1836   if (do_versioning)
1837     {
1838       FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1839         {
1840 	  stmt = DR_STMT (dr);
1841 	  stmt_info = vinfo_for_stmt (stmt);
1842 
1843 	  /* For interleaving, only the alignment of the first access
1844 	     matters.  */
1845 	  if (aligned_access_p (dr)
1846 	      || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1847 		  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1848 	    continue;
1849 
1850 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1851 
1852           if (!supportable_dr_alignment)
1853             {
1854               gimple stmt;
1855               int mask;
1856               tree vectype;
1857 
1858               if (known_alignment_for_access_p (dr)
1859                   || VEC_length (gimple,
1860                                  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1861                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1862                 {
1863                   do_versioning = false;
1864                   break;
1865                 }
1866 
1867               stmt = DR_STMT (dr);
1868               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1869               gcc_assert (vectype);
1870 
1871               /* The rightmost bits of an aligned address must be zeros.
1872                  Construct the mask needed for this test.  For example,
1873                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1874                  mask must be 15 = 0xf. */
1875               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1876 
1877               /* FORNOW: use the same mask to test all potentially unaligned
1878                  references in the loop.  The vectorizer currently supports
1879                  a single vector size, see the reference to
1880                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1881                  vectorization factor is computed.  */
1882               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1883                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1884               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1885               VEC_safe_push (gimple, heap,
1886                              LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1887                              DR_STMT (dr));
1888             }
1889         }
1890 
1891       /* Versioning requires at least one misaligned data reference.  */
1892       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1893         do_versioning = false;
1894       else if (!do_versioning)
1895         VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1896     }
1897 
1898   if (do_versioning)
1899     {
1900       VEC(gimple,heap) *may_misalign_stmts
1901         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1902       gimple stmt;
1903 
1904       /* It can now be assumed that the data references in the statements
1905          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1906          of the loop being vectorized.  */
1907       FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1908         {
1909           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1910           dr = STMT_VINFO_DATA_REF (stmt_info);
1911 	  SET_DR_MISALIGNMENT (dr, 0);
1912 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1913             fprintf (vect_dump, "Alignment of access forced using versioning.");
1914         }
1915 
1916       if (vect_print_dump_info (REPORT_DETAILS))
1917         fprintf (vect_dump, "Versioning for alignment will be applied.");
1918 
1919       /* Peeling and versioning can't be done together at this time.  */
1920       gcc_assert (! (do_peeling && do_versioning));
1921 
1922       stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1923       gcc_assert (stat);
1924       return stat;
1925     }
1926 
1927   /* This point is reached if neither peeling nor versioning is being done.  */
1928   gcc_assert (! (do_peeling || do_versioning));
1929 
1930   stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1931   return stat;
1932 }
1933 
1934 
1935 /* Function vect_find_same_alignment_drs.
1936 
1937    Update group and alignment relations according to the chosen
1938    vectorization factor.  */
1939 
1940 static void
1941 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1942 			      loop_vec_info loop_vinfo)
1943 {
1944   unsigned int i;
1945   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1946   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1947   struct data_reference *dra = DDR_A (ddr);
1948   struct data_reference *drb = DDR_B (ddr);
1949   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1950   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1951   int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1952   int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1953   lambda_vector dist_v;
1954   unsigned int loop_depth;
1955 
1956   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1957     return;
1958 
1959   if (dra == drb)
1960     return;
1961 
1962   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1963     return;
1964 
1965   /* Loop-based vectorization and known data dependence.  */
1966   if (DDR_NUM_DIST_VECTS (ddr) == 0)
1967     return;
1968 
1969   /* Data-dependence analysis reports a distance vector of zero
1970      for data-references that overlap only in the first iteration
1971      but have different sign step (see PR45764).
1972      So as a sanity check require equal DR_STEP.  */
1973   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1974     return;
1975 
1976   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1977   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1978     {
1979       int dist = dist_v[loop_depth];
1980 
1981       if (vect_print_dump_info (REPORT_DR_DETAILS))
1982 	fprintf (vect_dump, "dependence distance  = %d.", dist);
1983 
1984       /* Same loop iteration.  */
1985       if (dist == 0
1986 	  || (dist % vectorization_factor == 0 && dra_size == drb_size))
1987 	{
1988 	  /* Two references with distance zero have the same alignment.  */
1989 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1990 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1991 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1992 	    fprintf (vect_dump, "accesses have the same alignment.");
1993 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
1994 	    {
1995 	      fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1996 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1997 	      fprintf (vect_dump, " and ");
1998 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1999 	    }
2000 	}
2001     }
2002 }
2003 
2004 
2005 /* Function vect_analyze_data_refs_alignment
2006 
2007    Analyze the alignment of the data-references in the loop.
2008    Return FALSE if a data reference is found that cannot be vectorized.  */
2009 
2010 bool
2011 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2012                                   bb_vec_info bb_vinfo)
2013 {
2014   if (vect_print_dump_info (REPORT_DETAILS))
2015     fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2016 
2017   /* Mark groups of data references with same alignment using
2018      data dependence information.  */
2019   if (loop_vinfo)
2020     {
2021       VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2022       struct data_dependence_relation *ddr;
2023       unsigned int i;
2024 
2025       FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2026 	vect_find_same_alignment_drs (ddr, loop_vinfo);
2027     }
2028 
2029   if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2030     {
2031       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2032 	fprintf (vect_dump,
2033 		 "not vectorized: can't calculate alignment for data ref.");
2034       return false;
2035     }
2036 
2037   return true;
2038 }
2039 
2040 
2041 /* Analyze groups of strided accesses: check that DR belongs to a group of
2042    strided accesses of legal size, step, etc.  Detect gaps, single element
2043    interleaving, and other special cases. Set strided access info.
2044    Collect groups of strided stores for further use in SLP analysis.  */
2045 
2046 static bool
2047 vect_analyze_group_access (struct data_reference *dr)
2048 {
2049   tree step = DR_STEP (dr);
2050   tree scalar_type = TREE_TYPE (DR_REF (dr));
2051   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2052   gimple stmt = DR_STMT (dr);
2053   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2054   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2055   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2056   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2057   HOST_WIDE_INT stride, last_accessed_element = 1;
2058   bool slp_impossible = false;
2059   struct loop *loop = NULL;
2060 
2061   if (loop_vinfo)
2062     loop = LOOP_VINFO_LOOP (loop_vinfo);
2063 
2064   /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2065      interleaving group (including gaps).  */
2066   stride = dr_step / type_size;
2067 
2068   /* Not consecutive access is possible only if it is a part of interleaving.  */
2069   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2070     {
2071       /* Check if it this DR is a part of interleaving, and is a single
2072 	 element of the group that is accessed in the loop.  */
2073 
2074       /* Gaps are supported only for loads. STEP must be a multiple of the type
2075 	 size.  The size of the group must be a power of 2.  */
2076       if (DR_IS_READ (dr)
2077 	  && (dr_step % type_size) == 0
2078 	  && stride > 0
2079 	  && exact_log2 (stride) != -1)
2080 	{
2081 	  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2082 	  GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2083 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
2084 	    {
2085 	      fprintf (vect_dump, "Detected single element interleaving ");
2086 	      print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2087 	      fprintf (vect_dump, " step ");
2088 	      print_generic_expr (vect_dump, step, TDF_SLIM);
2089 	    }
2090 
2091 	  if (loop_vinfo)
2092 	    {
2093 	      if (vect_print_dump_info (REPORT_DETAILS))
2094 		fprintf (vect_dump, "Data access with gaps requires scalar "
2095 				    "epilogue loop");
2096               if (loop->inner)
2097                 {
2098                   if (vect_print_dump_info (REPORT_DETAILS))
2099                     fprintf (vect_dump, "Peeling for outer loop is not"
2100                                         " supported");
2101                   return false;
2102                 }
2103 
2104               LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2105 	    }
2106 
2107 	  return true;
2108 	}
2109 
2110       if (vect_print_dump_info (REPORT_DETAILS))
2111         {
2112  	  fprintf (vect_dump, "not consecutive access ");
2113           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2114         }
2115 
2116       if (bb_vinfo)
2117         {
2118           /* Mark the statement as unvectorizable.  */
2119           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2120           return true;
2121         }
2122 
2123       return false;
2124     }
2125 
2126   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2127     {
2128       /* First stmt in the interleaving chain. Check the chain.  */
2129       gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2130       struct data_reference *data_ref = dr;
2131       unsigned int count = 1;
2132       tree next_step;
2133       tree prev_init = DR_INIT (data_ref);
2134       gimple prev = stmt;
2135       HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2136 
2137       while (next)
2138         {
2139           /* Skip same data-refs.  In case that two or more stmts share
2140              data-ref (supported only for loads), we vectorize only the first
2141              stmt, and the rest get their vectorized loads from the first
2142              one.  */
2143           if (!tree_int_cst_compare (DR_INIT (data_ref),
2144                                      DR_INIT (STMT_VINFO_DATA_REF (
2145 						   vinfo_for_stmt (next)))))
2146             {
2147               if (DR_IS_WRITE (data_ref))
2148                 {
2149                   if (vect_print_dump_info (REPORT_DETAILS))
2150                     fprintf (vect_dump, "Two store stmts share the same dr.");
2151                   return false;
2152                 }
2153 
2154               /* Check that there is no load-store dependencies for this loads
2155                  to prevent a case of load-store-load to the same location.  */
2156               if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2157                   || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2158                 {
2159                   if (vect_print_dump_info (REPORT_DETAILS))
2160                     fprintf (vect_dump,
2161                              "READ_WRITE dependence in interleaving.");
2162                   return false;
2163                 }
2164 
2165               /* For load use the same data-ref load.  */
2166               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2167 
2168               prev = next;
2169               next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2170               continue;
2171             }
2172 
2173           prev = next;
2174 
2175           /* Check that all the accesses have the same STEP.  */
2176           next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2177           if (tree_int_cst_compare (step, next_step))
2178             {
2179               if (vect_print_dump_info (REPORT_DETAILS))
2180                 fprintf (vect_dump, "not consecutive access in interleaving");
2181               return false;
2182             }
2183 
2184           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2185           /* Check that the distance between two accesses is equal to the type
2186              size. Otherwise, we have gaps.  */
2187           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2188                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2189 	  if (diff != 1)
2190 	    {
2191 	      /* FORNOW: SLP of accesses with gaps is not supported.  */
2192 	      slp_impossible = true;
2193 	      if (DR_IS_WRITE (data_ref))
2194 		{
2195 		  if (vect_print_dump_info (REPORT_DETAILS))
2196 		    fprintf (vect_dump, "interleaved store with gaps");
2197 		  return false;
2198 		}
2199 
2200               gaps += diff - 1;
2201 	    }
2202 
2203 	  last_accessed_element += diff;
2204 
2205           /* Store the gap from the previous member of the group. If there is no
2206              gap in the access, GROUP_GAP is always 1.  */
2207           GROUP_GAP (vinfo_for_stmt (next)) = diff;
2208 
2209           prev_init = DR_INIT (data_ref);
2210           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2211           /* Count the number of data-refs in the chain.  */
2212           count++;
2213         }
2214 
2215       /* COUNT is the number of accesses found, we multiply it by the size of
2216          the type to get COUNT_IN_BYTES.  */
2217       count_in_bytes = type_size * count;
2218 
2219       /* Check that the size of the interleaving (including gaps) is not
2220          greater than STEP.  */
2221       if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2222         {
2223           if (vect_print_dump_info (REPORT_DETAILS))
2224             {
2225               fprintf (vect_dump, "interleaving size is greater than step for ");
2226               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2227             }
2228           return false;
2229         }
2230 
2231       /* Check that the size of the interleaving is equal to STEP for stores,
2232          i.e., that there are no gaps.  */
2233       if (dr_step && dr_step != count_in_bytes)
2234         {
2235           if (DR_IS_READ (dr))
2236             {
2237               slp_impossible = true;
2238               /* There is a gap after the last load in the group. This gap is a
2239                  difference between the stride and the number of elements. When
2240                  there is no gap, this difference should be 0.  */
2241               GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2242             }
2243           else
2244             {
2245               if (vect_print_dump_info (REPORT_DETAILS))
2246                 fprintf (vect_dump, "interleaved store with gaps");
2247               return false;
2248             }
2249         }
2250 
2251       /* Check that STEP is a multiple of type size.  */
2252       if (dr_step && (dr_step % type_size) != 0)
2253         {
2254           if (vect_print_dump_info (REPORT_DETAILS))
2255             {
2256               fprintf (vect_dump, "step is not a multiple of type size: step ");
2257               print_generic_expr (vect_dump, step, TDF_SLIM);
2258               fprintf (vect_dump, " size ");
2259               print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2260                                   TDF_SLIM);
2261             }
2262           return false;
2263         }
2264 
2265       if (stride == 0)
2266         stride = count;
2267 
2268       GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2269       if (vect_print_dump_info (REPORT_DETAILS))
2270         fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2271 
2272       /* SLP: create an SLP data structure for every interleaving group of
2273 	 stores for further analysis in vect_analyse_slp.  */
2274       if (DR_IS_WRITE (dr) && !slp_impossible)
2275         {
2276           if (loop_vinfo)
2277             VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2278                            stmt);
2279           if (bb_vinfo)
2280             VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2281                            stmt);
2282         }
2283 
2284       /* There is a gap in the end of the group.  */
2285       if (stride - last_accessed_element > 0 && loop_vinfo)
2286 	{
2287 	  if (vect_print_dump_info (REPORT_DETAILS))
2288 	    fprintf (vect_dump, "Data access with gaps requires scalar "
2289 				"epilogue loop");
2290           if (loop->inner)
2291             {
2292               if (vect_print_dump_info (REPORT_DETAILS))
2293                 fprintf (vect_dump, "Peeling for outer loop is not supported");
2294               return false;
2295             }
2296 
2297           LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2298 	}
2299     }
2300 
2301   return true;
2302 }
2303 
2304 
2305 /* Analyze the access pattern of the data-reference DR.
2306    In case of non-consecutive accesses call vect_analyze_group_access() to
2307    analyze groups of strided accesses.  */
2308 
2309 static bool
2310 vect_analyze_data_ref_access (struct data_reference *dr)
2311 {
2312   tree step = DR_STEP (dr);
2313   tree scalar_type = TREE_TYPE (DR_REF (dr));
2314   gimple stmt = DR_STMT (dr);
2315   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2316   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2317   struct loop *loop = NULL;
2318   HOST_WIDE_INT dr_step;
2319 
2320   if (loop_vinfo)
2321     loop = LOOP_VINFO_LOOP (loop_vinfo);
2322 
2323   if (loop_vinfo && !step)
2324     {
2325       if (vect_print_dump_info (REPORT_DETAILS))
2326 	fprintf (vect_dump, "bad data-ref access in loop");
2327       return false;
2328     }
2329 
2330   /* Allow invariant loads in loops.  */
2331   dr_step = TREE_INT_CST_LOW (step);
2332   if (loop_vinfo && dr_step == 0)
2333     {
2334       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2335       if (nested_in_vect_loop_p (loop, stmt))
2336 	{
2337 	  if (vect_print_dump_info (REPORT_DETAILS))
2338 	    fprintf (vect_dump, "zero step in inner loop of nest");
2339 	  return false;
2340 	}
2341       return DR_IS_READ (dr);
2342     }
2343 
2344   if (loop && nested_in_vect_loop_p (loop, stmt))
2345     {
2346       /* Interleaved accesses are not yet supported within outer-loop
2347         vectorization for references in the inner-loop.  */
2348       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2349 
2350       /* For the rest of the analysis we use the outer-loop step.  */
2351       step = STMT_VINFO_DR_STEP (stmt_info);
2352       dr_step = TREE_INT_CST_LOW (step);
2353 
2354       if (dr_step == 0)
2355 	{
2356 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
2357 	    fprintf (vect_dump, "zero step in outer loop.");
2358 	  if (DR_IS_READ (dr))
2359   	    return true;
2360 	  else
2361 	    return false;
2362 	}
2363     }
2364 
2365   /* Consecutive?  */
2366   if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2367       || (dr_step < 0
2368 	  && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2369     {
2370       /* Mark that it is not interleaving.  */
2371       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2372       return true;
2373     }
2374 
2375   if (loop && nested_in_vect_loop_p (loop, stmt))
2376     {
2377       if (vect_print_dump_info (REPORT_ALIGNMENT))
2378 	fprintf (vect_dump, "strided access in outer loop.");
2379       return false;
2380     }
2381 
2382   /* Not consecutive access - check if it's a part of interleaving group.  */
2383   return vect_analyze_group_access (dr);
2384 }
2385 
2386 
2387 /* Function vect_analyze_data_ref_accesses.
2388 
2389    Analyze the access pattern of all the data references in the loop.
2390 
2391    FORNOW: the only access pattern that is considered vectorizable is a
2392 	   simple step 1 (consecutive) access.
2393 
2394    FORNOW: handle only arrays and pointer accesses.  */
2395 
2396 bool
2397 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2398 {
2399   unsigned int i;
2400   VEC (data_reference_p, heap) *datarefs;
2401   struct data_reference *dr;
2402 
2403   if (vect_print_dump_info (REPORT_DETAILS))
2404     fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2405 
2406   if (loop_vinfo)
2407     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2408   else
2409     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2410 
2411   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2412     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2413         && !vect_analyze_data_ref_access (dr))
2414       {
2415 	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2416 	  fprintf (vect_dump, "not vectorized: complicated access pattern.");
2417 
2418         if (bb_vinfo)
2419           {
2420             /* Mark the statement as not vectorizable.  */
2421             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2422             continue;
2423           }
2424         else
2425           return false;
2426       }
2427 
2428   return true;
2429 }
2430 
2431 /* Function vect_prune_runtime_alias_test_list.
2432 
2433    Prune a list of ddrs to be tested at run-time by versioning for alias.
2434    Return FALSE if resulting list of ddrs is longer then allowed by
2435    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
2436 
2437 bool
2438 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2439 {
2440   VEC (ddr_p, heap) * ddrs =
2441     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2442   unsigned i, j;
2443 
2444   if (vect_print_dump_info (REPORT_DETAILS))
2445     fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2446 
2447   for (i = 0; i < VEC_length (ddr_p, ddrs); )
2448     {
2449       bool found;
2450       ddr_p ddr_i;
2451 
2452       ddr_i = VEC_index (ddr_p, ddrs, i);
2453       found = false;
2454 
2455       for (j = 0; j < i; j++)
2456         {
2457 	  ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2458 
2459 	  if (vect_vfa_range_equal (ddr_i, ddr_j))
2460 	    {
2461 	      if (vect_print_dump_info (REPORT_DR_DETAILS))
2462 		{
2463 		  fprintf (vect_dump, "found equal ranges ");
2464 		  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2465 		  fprintf (vect_dump, ", ");
2466 		  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2467 		  fprintf (vect_dump, " and ");
2468 		  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2469 		  fprintf (vect_dump, ", ");
2470 		  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2471 		}
2472 	      found = true;
2473 	      break;
2474 	    }
2475 	}
2476 
2477       if (found)
2478       {
2479 	VEC_ordered_remove (ddr_p, ddrs, i);
2480 	continue;
2481       }
2482       i++;
2483     }
2484 
2485   if (VEC_length (ddr_p, ddrs) >
2486        (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2487     {
2488       if (vect_print_dump_info (REPORT_DR_DETAILS))
2489 	{
2490 	  fprintf (vect_dump,
2491 		   "disable versioning for alias - max number of generated "
2492 		   "checks exceeded.");
2493 	}
2494 
2495       VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2496 
2497       return false;
2498     }
2499 
2500   return true;
2501 }
2502 
2503 /* Check whether a non-affine read in stmt is suitable for gather load
2504    and if so, return a builtin decl for that operation.  */
2505 
2506 tree
2507 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2508 		   tree *offp, int *scalep)
2509 {
2510   HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2511   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2512   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2513   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2514   tree offtype = NULL_TREE;
2515   tree decl, base, off;
2516   enum machine_mode pmode;
2517   int punsignedp, pvolatilep;
2518 
2519   /* The gather builtins need address of the form
2520      loop_invariant + vector * {1, 2, 4, 8}
2521      or
2522      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2523      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2524      of loop invariants/SSA_NAMEs defined in the loop, with casts,
2525      multiplications and additions in it.  To get a vector, we need
2526      a single SSA_NAME that will be defined in the loop and will
2527      contain everything that is not loop invariant and that can be
2528      vectorized.  The following code attempts to find such a preexistng
2529      SSA_NAME OFF and put the loop invariants into a tree BASE
2530      that can be gimplified before the loop.  */
2531   base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2532 			      &pmode, &punsignedp, &pvolatilep, false);
2533   gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2534 
2535   if (TREE_CODE (base) == MEM_REF)
2536     {
2537       if (!integer_zerop (TREE_OPERAND (base, 1)))
2538 	{
2539 	  if (off == NULL_TREE)
2540 	    {
2541 	      double_int moff = mem_ref_offset (base);
2542 	      off = double_int_to_tree (sizetype, moff);
2543 	    }
2544 	  else
2545 	    off = size_binop (PLUS_EXPR, off,
2546 			      fold_convert (sizetype, TREE_OPERAND (base, 1)));
2547 	}
2548       base = TREE_OPERAND (base, 0);
2549     }
2550   else
2551     base = build_fold_addr_expr (base);
2552 
2553   if (off == NULL_TREE)
2554     off = size_zero_node;
2555 
2556   /* If base is not loop invariant, either off is 0, then we start with just
2557      the constant offset in the loop invariant BASE and continue with base
2558      as OFF, otherwise give up.
2559      We could handle that case by gimplifying the addition of base + off
2560      into some SSA_NAME and use that as off, but for now punt.  */
2561   if (!expr_invariant_in_loop_p (loop, base))
2562     {
2563       if (!integer_zerop (off))
2564 	return NULL_TREE;
2565       off = base;
2566       base = size_int (pbitpos / BITS_PER_UNIT);
2567     }
2568   /* Otherwise put base + constant offset into the loop invariant BASE
2569      and continue with OFF.  */
2570   else
2571     {
2572       base = fold_convert (sizetype, base);
2573       base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2574     }
2575 
2576   /* OFF at this point may be either a SSA_NAME or some tree expression
2577      from get_inner_reference.  Try to peel off loop invariants from it
2578      into BASE as long as possible.  */
2579   STRIP_NOPS (off);
2580   while (offtype == NULL_TREE)
2581     {
2582       enum tree_code code;
2583       tree op0, op1, add = NULL_TREE;
2584 
2585       if (TREE_CODE (off) == SSA_NAME)
2586 	{
2587 	  gimple def_stmt = SSA_NAME_DEF_STMT (off);
2588 
2589 	  if (expr_invariant_in_loop_p (loop, off))
2590 	    return NULL_TREE;
2591 
2592 	  if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2593 	    break;
2594 
2595 	  op0 = gimple_assign_rhs1 (def_stmt);
2596 	  code = gimple_assign_rhs_code (def_stmt);
2597 	  op1 = gimple_assign_rhs2 (def_stmt);
2598 	}
2599       else
2600 	{
2601 	  if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2602 	    return NULL_TREE;
2603 	  code = TREE_CODE (off);
2604 	  extract_ops_from_tree (off, &code, &op0, &op1);
2605 	}
2606       switch (code)
2607 	{
2608 	case POINTER_PLUS_EXPR:
2609 	case PLUS_EXPR:
2610 	  if (expr_invariant_in_loop_p (loop, op0))
2611 	    {
2612 	      add = op0;
2613 	      off = op1;
2614 	    do_add:
2615 	      add = fold_convert (sizetype, add);
2616 	      if (scale != 1)
2617 		add = size_binop (MULT_EXPR, add, size_int (scale));
2618 	      base = size_binop (PLUS_EXPR, base, add);
2619 	      continue;
2620 	    }
2621 	  if (expr_invariant_in_loop_p (loop, op1))
2622 	    {
2623 	      add = op1;
2624 	      off = op0;
2625 	      goto do_add;
2626 	    }
2627 	  break;
2628 	case MINUS_EXPR:
2629 	  if (expr_invariant_in_loop_p (loop, op1))
2630 	    {
2631 	      add = fold_convert (sizetype, op1);
2632 	      add = size_binop (MINUS_EXPR, size_zero_node, add);
2633 	      off = op0;
2634 	      goto do_add;
2635 	    }
2636 	  break;
2637 	case MULT_EXPR:
2638 	  if (scale == 1 && host_integerp (op1, 0))
2639 	    {
2640 	      scale = tree_low_cst (op1, 0);
2641 	      off = op0;
2642 	      continue;
2643 	    }
2644 	  break;
2645 	case SSA_NAME:
2646 	  off = op0;
2647 	  continue;
2648 	CASE_CONVERT:
2649 	  if (!POINTER_TYPE_P (TREE_TYPE (op0))
2650 	      && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2651 	    break;
2652 	  if (TYPE_PRECISION (TREE_TYPE (op0))
2653 	      == TYPE_PRECISION (TREE_TYPE (off)))
2654 	    {
2655 	      off = op0;
2656 	      continue;
2657 	    }
2658 	  if (TYPE_PRECISION (TREE_TYPE (op0))
2659 	      < TYPE_PRECISION (TREE_TYPE (off)))
2660 	    {
2661 	      off = op0;
2662 	      offtype = TREE_TYPE (off);
2663 	      STRIP_NOPS (off);
2664 	      continue;
2665 	    }
2666 	  break;
2667 	default:
2668 	  break;
2669 	}
2670       break;
2671     }
2672 
2673   /* If at the end OFF still isn't a SSA_NAME or isn't
2674      defined in the loop, punt.  */
2675   if (TREE_CODE (off) != SSA_NAME
2676       || expr_invariant_in_loop_p (loop, off))
2677     return NULL_TREE;
2678 
2679   if (offtype == NULL_TREE)
2680     offtype = TREE_TYPE (off);
2681 
2682   decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2683 					   offtype, scale);
2684   if (decl == NULL_TREE)
2685     return NULL_TREE;
2686 
2687   if (basep)
2688     *basep = base;
2689   if (offp)
2690     *offp = off;
2691   if (scalep)
2692     *scalep = scale;
2693   return decl;
2694 }
2695 
2696 
2697 /* Function vect_analyze_data_refs.
2698 
2699   Find all the data references in the loop or basic block.
2700 
2701    The general structure of the analysis of data refs in the vectorizer is as
2702    follows:
2703    1- vect_analyze_data_refs(loop/bb): call
2704       compute_data_dependences_for_loop/bb to find and analyze all data-refs
2705       in the loop/bb and their dependences.
2706    2- vect_analyze_dependences(): apply dependence testing using ddrs.
2707    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2708    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2709 
2710 */
2711 
2712 bool
2713 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2714 			bb_vec_info bb_vinfo,
2715 			int *min_vf)
2716 {
2717   struct loop *loop = NULL;
2718   basic_block bb = NULL;
2719   unsigned int i;
2720   VEC (data_reference_p, heap) *datarefs;
2721   struct data_reference *dr;
2722   tree scalar_type;
2723   bool res, stop_bb_analysis = false;
2724 
2725   if (vect_print_dump_info (REPORT_DETAILS))
2726     fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2727 
2728   if (loop_vinfo)
2729     {
2730       loop = LOOP_VINFO_LOOP (loop_vinfo);
2731       res = compute_data_dependences_for_loop
2732 	(loop, true,
2733 	 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2734 	 &LOOP_VINFO_DATAREFS (loop_vinfo),
2735 	 &LOOP_VINFO_DDRS (loop_vinfo));
2736 
2737       if (!res)
2738 	{
2739 	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2740 	    fprintf (vect_dump, "not vectorized: loop contains function calls"
2741 		     " or data references that cannot be analyzed");
2742 	  return false;
2743 	}
2744 
2745       datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2746     }
2747   else
2748     {
2749       bb = BB_VINFO_BB (bb_vinfo);
2750       res = compute_data_dependences_for_bb (bb, true,
2751 					     &BB_VINFO_DATAREFS (bb_vinfo),
2752 					     &BB_VINFO_DDRS (bb_vinfo));
2753       if (!res)
2754 	{
2755 	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2756 	    fprintf (vect_dump, "not vectorized: basic block contains function"
2757 		     " calls or data references that cannot be analyzed");
2758 	  return false;
2759 	}
2760 
2761       datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2762     }
2763 
2764   /* Go through the data-refs, check that the analysis succeeded.  Update
2765      pointer from stmt_vec_info struct to DR and vectype.  */
2766 
2767   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2768     {
2769       gimple stmt;
2770       stmt_vec_info stmt_info;
2771       tree base, offset, init;
2772       bool gather = false;
2773       int vf;
2774 
2775       if (!dr || !DR_REF (dr))
2776         {
2777           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2778 	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2779 
2780           return false;
2781         }
2782 
2783       stmt = DR_STMT (dr);
2784       stmt_info = vinfo_for_stmt (stmt);
2785 
2786       if (stop_bb_analysis)
2787         {
2788           STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2789           continue;
2790         }
2791 
2792       /* Check that analysis of the data-ref succeeded.  */
2793       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2794 	  || !DR_STEP (dr))
2795         {
2796 	  /* If target supports vector gather loads, see if they can't
2797 	     be used.  */
2798 	  if (loop_vinfo
2799 	      && DR_IS_READ (dr)
2800 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
2801 	      && targetm.vectorize.builtin_gather != NULL
2802 	      && !nested_in_vect_loop_p (loop, stmt))
2803 	    {
2804 	      struct data_reference *newdr
2805 		= create_data_ref (NULL, loop_containing_stmt (stmt),
2806 				   DR_REF (dr), stmt, true);
2807 	      gcc_assert (newdr != NULL && DR_REF (newdr));
2808 	      if (DR_BASE_ADDRESS (newdr)
2809 		  && DR_OFFSET (newdr)
2810 		  && DR_INIT (newdr)
2811 		  && DR_STEP (newdr)
2812 		  && integer_zerop (DR_STEP (newdr)))
2813 		{
2814 		  dr = newdr;
2815 		  gather = true;
2816 		}
2817 	      else
2818 		free_data_ref (newdr);
2819 	    }
2820 
2821 	  if (!gather)
2822 	    {
2823 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2824 		{
2825 		  fprintf (vect_dump, "not vectorized: data ref analysis "
2826 				      "failed ");
2827 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2828 		}
2829 
2830 	      if (bb_vinfo)
2831 		{
2832 		  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2833 		  stop_bb_analysis = true;
2834 		  continue;
2835 		}
2836 
2837 	      return false;
2838 	    }
2839         }
2840 
2841       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2842         {
2843           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2844             fprintf (vect_dump, "not vectorized: base addr of dr is a "
2845                      "constant");
2846 
2847           if (bb_vinfo)
2848             {
2849               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2850               stop_bb_analysis = true;
2851               continue;
2852             }
2853 
2854 	  if (gather)
2855 	    free_data_ref (dr);
2856 	  return false;
2857         }
2858 
2859       if (TREE_THIS_VOLATILE (DR_REF (dr)))
2860         {
2861           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2862             {
2863               fprintf (vect_dump, "not vectorized: volatile type ");
2864               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2865             }
2866 
2867           if (bb_vinfo)
2868             {
2869               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2870               stop_bb_analysis = true;
2871               continue;
2872             }
2873 
2874           return false;
2875         }
2876 
2877       if (stmt_can_throw_internal (stmt))
2878         {
2879           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2880             {
2881               fprintf (vect_dump, "not vectorized: statement can throw an "
2882                        "exception ");
2883               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2884             }
2885 
2886           if (bb_vinfo)
2887             {
2888               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2889               stop_bb_analysis = true;
2890               continue;
2891             }
2892 
2893 	  if (gather)
2894 	    free_data_ref (dr);
2895           return false;
2896         }
2897 
2898       if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
2899 	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
2900 	{
2901           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2902             {
2903               fprintf (vect_dump, "not vectorized: statement is bitfield "
2904                        "access ");
2905               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2906             }
2907 
2908           if (bb_vinfo)
2909             {
2910               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2911               stop_bb_analysis = true;
2912               continue;
2913             }
2914 
2915 	  if (gather)
2916 	    free_data_ref (dr);
2917           return false;
2918 	}
2919 
2920       base = unshare_expr (DR_BASE_ADDRESS (dr));
2921       offset = unshare_expr (DR_OFFSET (dr));
2922       init = unshare_expr (DR_INIT (dr));
2923 
2924       if (is_gimple_call (stmt))
2925 	{
2926 	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2927 	    {
2928 	      fprintf (vect_dump, "not vectorized: dr in a call ");
2929 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2930 	    }
2931 
2932 	  if (bb_vinfo)
2933 	    {
2934 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2935 	      stop_bb_analysis = true;
2936 	      continue;
2937 	    }
2938 
2939 	  if (gather)
2940 	    free_data_ref (dr);
2941 	  return false;
2942 	}
2943 
2944       /* Update DR field in stmt_vec_info struct.  */
2945 
2946       /* If the dataref is in an inner-loop of the loop that is considered for
2947 	 for vectorization, we also want to analyze the access relative to
2948 	 the outer-loop (DR contains information only relative to the
2949 	 inner-most enclosing loop).  We do that by building a reference to the
2950 	 first location accessed by the inner-loop, and analyze it relative to
2951 	 the outer-loop.  */
2952       if (loop && nested_in_vect_loop_p (loop, stmt))
2953 	{
2954 	  tree outer_step, outer_base, outer_init;
2955 	  HOST_WIDE_INT pbitsize, pbitpos;
2956 	  tree poffset;
2957 	  enum machine_mode pmode;
2958 	  int punsignedp, pvolatilep;
2959 	  affine_iv base_iv, offset_iv;
2960 	  tree dinit;
2961 
2962 	  /* Build a reference to the first location accessed by the
2963 	     inner-loop: *(BASE+INIT).  (The first location is actually
2964 	     BASE+INIT+OFFSET, but we add OFFSET separately later).  */
2965           tree inner_base = build_fold_indirect_ref
2966                                 (fold_build_pointer_plus (base, init));
2967 
2968 	  if (vect_print_dump_info (REPORT_DETAILS))
2969 	    {
2970 	      fprintf (vect_dump, "analyze in outer-loop: ");
2971 	      print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2972 	    }
2973 
2974 	  outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2975 		          &poffset, &pmode, &punsignedp, &pvolatilep, false);
2976 	  gcc_assert (outer_base != NULL_TREE);
2977 
2978 	  if (pbitpos % BITS_PER_UNIT != 0)
2979 	    {
2980 	      if (vect_print_dump_info (REPORT_DETAILS))
2981 		fprintf (vect_dump, "failed: bit offset alignment.\n");
2982 	      return false;
2983 	    }
2984 
2985 	  outer_base = build_fold_addr_expr (outer_base);
2986 	  if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2987                           &base_iv, false))
2988 	    {
2989 	      if (vect_print_dump_info (REPORT_DETAILS))
2990 		fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2991 	      return false;
2992 	    }
2993 
2994 	  if (offset)
2995 	    {
2996 	      if (poffset)
2997 		poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2998                                        poffset);
2999 	      else
3000 		poffset = offset;
3001 	    }
3002 
3003 	  if (!poffset)
3004 	    {
3005 	      offset_iv.base = ssize_int (0);
3006 	      offset_iv.step = ssize_int (0);
3007 	    }
3008 	  else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3009                                &offset_iv, false))
3010 	    {
3011 	      if (vect_print_dump_info (REPORT_DETAILS))
3012 	        fprintf (vect_dump, "evolution of offset is not affine.\n");
3013 	      return false;
3014 	    }
3015 
3016 	  outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3017 	  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3018 	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3019 	  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3020 	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3021 
3022 	  outer_step = size_binop (PLUS_EXPR,
3023 				fold_convert (ssizetype, base_iv.step),
3024 				fold_convert (ssizetype, offset_iv.step));
3025 
3026 	  STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3027 	  /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3028 	  STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3029 	  STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3030 	  STMT_VINFO_DR_OFFSET (stmt_info) =
3031 				fold_convert (ssizetype, offset_iv.base);
3032 	  STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3033 				size_int (highest_pow2_factor (offset_iv.base));
3034 
3035 	  if (vect_print_dump_info (REPORT_DETAILS))
3036 	    {
3037 	      fprintf (vect_dump, "\touter base_address: ");
3038 	      print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3039 	      fprintf (vect_dump, "\n\touter offset from base address: ");
3040 	      print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3041 	      fprintf (vect_dump, "\n\touter constant offset from base address: ");
3042 	      print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3043 	      fprintf (vect_dump, "\n\touter step: ");
3044 	      print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3045 	      fprintf (vect_dump, "\n\touter aligned to: ");
3046 	      print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3047 	    }
3048 	}
3049 
3050       if (STMT_VINFO_DATA_REF (stmt_info))
3051         {
3052           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3053             {
3054               fprintf (vect_dump,
3055                        "not vectorized: more than one data ref in stmt: ");
3056               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3057             }
3058 
3059           if (bb_vinfo)
3060             {
3061               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3062               stop_bb_analysis = true;
3063               continue;
3064             }
3065 
3066 	  if (gather)
3067 	    free_data_ref (dr);
3068           return false;
3069         }
3070 
3071       STMT_VINFO_DATA_REF (stmt_info) = dr;
3072 
3073       /* Set vectype for STMT.  */
3074       scalar_type = TREE_TYPE (DR_REF (dr));
3075       STMT_VINFO_VECTYPE (stmt_info) =
3076                 get_vectype_for_scalar_type (scalar_type);
3077       if (!STMT_VINFO_VECTYPE (stmt_info))
3078         {
3079           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3080             {
3081               fprintf (vect_dump,
3082                        "not vectorized: no vectype for stmt: ");
3083               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3084               fprintf (vect_dump, " scalar_type: ");
3085               print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3086             }
3087 
3088           if (bb_vinfo)
3089             {
3090               /* Mark the statement as not vectorizable.  */
3091               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3092               stop_bb_analysis = true;
3093               continue;
3094             }
3095 
3096 	  if (gather)
3097 	    {
3098 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
3099 	      free_data_ref (dr);
3100 	    }
3101 	  return false;
3102         }
3103 
3104       /* Adjust the minimal vectorization factor according to the
3105 	 vector type.  */
3106       vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3107       if (vf > *min_vf)
3108 	*min_vf = vf;
3109 
3110       if (gather)
3111 	{
3112 	  unsigned int j, k, n;
3113 	  struct data_reference *olddr
3114 	    = VEC_index (data_reference_p, datarefs, i);
3115 	  VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3116 	  struct data_dependence_relation *ddr, *newddr;
3117 	  bool bad = false;
3118 	  tree off;
3119 	  VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3120 
3121 	  if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
3122 	      || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3123 	    {
3124 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3125 		{
3126 		  fprintf (vect_dump,
3127 			   "not vectorized: not suitable for gather ");
3128 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3129 		}
3130 	      return false;
3131 	    }
3132 
3133 	  n = VEC_length (data_reference_p, datarefs) - 1;
3134 	  for (j = 0, k = i - 1; j < i; j++)
3135 	    {
3136 	      ddr = VEC_index (ddr_p, ddrs, k);
3137 	      gcc_assert (DDR_B (ddr) == olddr);
3138 	      newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3139 							    nest);
3140 	      VEC_replace (ddr_p, ddrs, k, newddr);
3141 	      free_dependence_relation (ddr);
3142 	      if (!bad
3143 		  && DR_IS_WRITE (DDR_A (newddr))
3144 		  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3145 		bad = true;
3146 	      k += --n;
3147 	    }
3148 
3149 	  k++;
3150 	  n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3151 	  for (; k < n; k++)
3152 	    {
3153 	      ddr = VEC_index (ddr_p, ddrs, k);
3154 	      gcc_assert (DDR_A (ddr) == olddr);
3155 	      newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3156 							    nest);
3157 	      VEC_replace (ddr_p, ddrs, k, newddr);
3158 	      free_dependence_relation (ddr);
3159 	      if (!bad
3160 		  && DR_IS_WRITE (DDR_B (newddr))
3161 		  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3162 		bad = true;
3163 	    }
3164 
3165 	  k = VEC_length (ddr_p, ddrs)
3166 	      - VEC_length (data_reference_p, datarefs) + i;
3167 	  ddr = VEC_index (ddr_p, ddrs, k);
3168 	  gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3169 	  newddr = initialize_data_dependence_relation (dr, dr, nest);
3170 	  VEC_replace (ddr_p, ddrs, k, newddr);
3171 	  free_dependence_relation (ddr);
3172 	  VEC_replace (data_reference_p, datarefs, i, dr);
3173 
3174 	  if (bad)
3175 	    {
3176 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3177 		{
3178 		  fprintf (vect_dump,
3179 			   "not vectorized: data dependence conflict"
3180 			   " prevents gather");
3181 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3182 		}
3183 	      return false;
3184 	    }
3185 
3186 	  STMT_VINFO_GATHER_P (stmt_info) = true;
3187 	}
3188     }
3189 
3190   return true;
3191 }
3192 
3193 
3194 /* Function vect_get_new_vect_var.
3195 
3196    Returns a name for a new variable.  The current naming scheme appends the
3197    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3198    the name of vectorizer generated variables, and appends that to NAME if
3199    provided.  */
3200 
3201 tree
3202 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3203 {
3204   const char *prefix;
3205   tree new_vect_var;
3206 
3207   switch (var_kind)
3208   {
3209   case vect_simple_var:
3210     prefix = "vect_";
3211     break;
3212   case vect_scalar_var:
3213     prefix = "stmp_";
3214     break;
3215   case vect_pointer_var:
3216     prefix = "vect_p";
3217     break;
3218   default:
3219     gcc_unreachable ();
3220   }
3221 
3222   if (name)
3223     {
3224       char* tmp = concat (prefix, name, NULL);
3225       new_vect_var = create_tmp_var (type, tmp);
3226       free (tmp);
3227     }
3228   else
3229     new_vect_var = create_tmp_var (type, prefix);
3230 
3231   /* Mark vector typed variable as a gimple register variable.  */
3232   if (TREE_CODE (type) == VECTOR_TYPE)
3233     DECL_GIMPLE_REG_P (new_vect_var) = true;
3234 
3235   return new_vect_var;
3236 }
3237 
3238 
3239 /* Function vect_create_addr_base_for_vector_ref.
3240 
3241    Create an expression that computes the address of the first memory location
3242    that will be accessed for a data reference.
3243 
3244    Input:
3245    STMT: The statement containing the data reference.
3246    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3247    OFFSET: Optional. If supplied, it is be added to the initial address.
3248    LOOP:    Specify relative to which loop-nest should the address be computed.
3249             For example, when the dataref is in an inner-loop nested in an
3250 	    outer-loop that is now being vectorized, LOOP can be either the
3251 	    outer-loop, or the inner-loop.  The first memory location accessed
3252 	    by the following dataref ('in' points to short):
3253 
3254 		for (i=0; i<N; i++)
3255 		   for (j=0; j<M; j++)
3256 		     s += in[i+j]
3257 
3258 	    is as follows:
3259 	    if LOOP=i_loop:	&in		(relative to i_loop)
3260 	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
3261 
3262    Output:
3263    1. Return an SSA_NAME whose value is the address of the memory location of
3264       the first vector of the data reference.
3265    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3266       these statement(s) which define the returned SSA_NAME.
3267 
3268    FORNOW: We are only handling array accesses with step 1.  */
3269 
3270 tree
3271 vect_create_addr_base_for_vector_ref (gimple stmt,
3272 				      gimple_seq *new_stmt_list,
3273 				      tree offset,
3274 				      struct loop *loop)
3275 {
3276   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3277   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3278   tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3279   tree base_name;
3280   tree data_ref_base_var;
3281   tree vec_stmt;
3282   tree addr_base, addr_expr;
3283   tree dest;
3284   gimple_seq seq = NULL;
3285   tree base_offset = unshare_expr (DR_OFFSET (dr));
3286   tree init = unshare_expr (DR_INIT (dr));
3287   tree vect_ptr_type;
3288   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3289   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3290   tree base;
3291 
3292   if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3293     {
3294       struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3295 
3296       gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3297 
3298       data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3299       base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3300       init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3301     }
3302 
3303   if (loop_vinfo)
3304     base_name = build_fold_indirect_ref (data_ref_base);
3305   else
3306     {
3307       base_offset = ssize_int (0);
3308       init = ssize_int (0);
3309       base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3310     }
3311 
3312   data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3313   add_referenced_var (data_ref_base_var);
3314   data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3315 					data_ref_base_var);
3316   gimple_seq_add_seq (new_stmt_list, seq);
3317 
3318   /* Create base_offset */
3319   base_offset = size_binop (PLUS_EXPR,
3320 			    fold_convert (sizetype, base_offset),
3321 			    fold_convert (sizetype, init));
3322   dest = create_tmp_var (sizetype, "base_off");
3323   add_referenced_var (dest);
3324   base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3325   gimple_seq_add_seq (new_stmt_list, seq);
3326 
3327   if (offset)
3328     {
3329       tree tmp = create_tmp_var (sizetype, "offset");
3330 
3331       add_referenced_var (tmp);
3332       offset = fold_build2 (MULT_EXPR, sizetype,
3333 			    fold_convert (sizetype, offset), step);
3334       base_offset = fold_build2 (PLUS_EXPR, sizetype,
3335 				 base_offset, offset);
3336       base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3337       gimple_seq_add_seq (new_stmt_list, seq);
3338     }
3339 
3340   /* base + base_offset */
3341   if (loop_vinfo)
3342     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3343   else
3344     {
3345       addr_base = build1 (ADDR_EXPR,
3346 			  build_pointer_type (TREE_TYPE (DR_REF (dr))),
3347 			  unshare_expr (DR_REF (dr)));
3348     }
3349 
3350   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3351   base = get_base_address (DR_REF (dr));
3352   if (base
3353       && TREE_CODE (base) == MEM_REF)
3354     vect_ptr_type
3355       = build_qualified_type (vect_ptr_type,
3356 			      TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3357 
3358   vec_stmt = fold_convert (vect_ptr_type, addr_base);
3359   addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3360                                      get_name (base_name));
3361   add_referenced_var (addr_expr);
3362   vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3363   gimple_seq_add_seq (new_stmt_list, seq);
3364 
3365   if (DR_PTR_INFO (dr)
3366       && TREE_CODE (vec_stmt) == SSA_NAME)
3367     {
3368       duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3369       if (offset)
3370 	{
3371 	  SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
3372 	  SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
3373 	}
3374     }
3375 
3376   if (vect_print_dump_info (REPORT_DETAILS))
3377     {
3378       fprintf (vect_dump, "created ");
3379       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3380     }
3381 
3382   return vec_stmt;
3383 }
3384 
3385 
3386 /* Function vect_create_data_ref_ptr.
3387 
3388    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3389    location accessed in the loop by STMT, along with the def-use update
3390    chain to appropriately advance the pointer through the loop iterations.
3391    Also set aliasing information for the pointer.  This pointer is used by
3392    the callers to this function to create a memory reference expression for
3393    vector load/store access.
3394 
3395    Input:
3396    1. STMT: a stmt that references memory. Expected to be of the form
3397          GIMPLE_ASSIGN <name, data-ref> or
3398 	 GIMPLE_ASSIGN <data-ref, name>.
3399    2. AGGR_TYPE: the type of the reference, which should be either a vector
3400         or an array.
3401    3. AT_LOOP: the loop where the vector memref is to be created.
3402    4. OFFSET (optional): an offset to be added to the initial address accessed
3403         by the data-ref in STMT.
3404    5. BSI: location where the new stmts are to be placed if there is no loop
3405    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3406         pointing to the initial address.
3407 
3408    Output:
3409    1. Declare a new ptr to vector_type, and have it point to the base of the
3410       data reference (initial addressed accessed by the data reference).
3411       For example, for vector of type V8HI, the following code is generated:
3412 
3413       v8hi *ap;
3414       ap = (v8hi *)initial_address;
3415 
3416       if OFFSET is not supplied:
3417          initial_address = &a[init];
3418       if OFFSET is supplied:
3419          initial_address = &a[init + OFFSET];
3420 
3421       Return the initial_address in INITIAL_ADDRESS.
3422 
3423    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
3424       update the pointer in each iteration of the loop.
3425 
3426       Return the increment stmt that updates the pointer in PTR_INCR.
3427 
3428    3. Set INV_P to true if the access pattern of the data reference in the
3429       vectorized loop is invariant.  Set it to false otherwise.
3430 
3431    4. Return the pointer.  */
3432 
3433 tree
3434 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3435 			  tree offset, tree *initial_address,
3436 			  gimple_stmt_iterator *gsi, gimple *ptr_incr,
3437 			  bool only_init, bool *inv_p)
3438 {
3439   tree base_name;
3440   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3441   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3442   struct loop *loop = NULL;
3443   bool nested_in_vect_loop = false;
3444   struct loop *containing_loop = NULL;
3445   tree aggr_ptr_type;
3446   tree aggr_ptr;
3447   tree new_temp;
3448   gimple vec_stmt;
3449   gimple_seq new_stmt_list = NULL;
3450   edge pe = NULL;
3451   basic_block new_bb;
3452   tree aggr_ptr_init;
3453   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3454   tree aptr;
3455   gimple_stmt_iterator incr_gsi;
3456   bool insert_after;
3457   bool negative;
3458   tree indx_before_incr, indx_after_incr;
3459   gimple incr;
3460   tree step;
3461   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3462   tree base;
3463 
3464   gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3465 	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
3466 
3467   if (loop_vinfo)
3468     {
3469       loop = LOOP_VINFO_LOOP (loop_vinfo);
3470       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3471       containing_loop = (gimple_bb (stmt))->loop_father;
3472       pe = loop_preheader_edge (loop);
3473     }
3474   else
3475     {
3476       gcc_assert (bb_vinfo);
3477       only_init = true;
3478       *ptr_incr = NULL;
3479     }
3480 
3481   /* Check the step (evolution) of the load in LOOP, and record
3482      whether it's invariant.  */
3483   if (nested_in_vect_loop)
3484     step = STMT_VINFO_DR_STEP (stmt_info);
3485   else
3486     step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3487 
3488   if (tree_int_cst_compare (step, size_zero_node) == 0)
3489     *inv_p = true;
3490   else
3491     *inv_p = false;
3492   negative = tree_int_cst_compare (step, size_zero_node) < 0;
3493 
3494   /* Create an expression for the first address accessed by this load
3495      in LOOP.  */
3496   base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3497 
3498   if (vect_print_dump_info (REPORT_DETAILS))
3499     {
3500       tree data_ref_base = base_name;
3501       fprintf (vect_dump, "create %s-pointer variable to type: ",
3502 	       tree_code_name[(int) TREE_CODE (aggr_type)]);
3503       print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3504       if (TREE_CODE (data_ref_base) == VAR_DECL
3505           || TREE_CODE (data_ref_base) == ARRAY_REF)
3506         fprintf (vect_dump, "  vectorizing an array ref: ");
3507       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3508         fprintf (vect_dump, "  vectorizing a record based array ref: ");
3509       else if (TREE_CODE (data_ref_base) == SSA_NAME)
3510         fprintf (vect_dump, "  vectorizing a pointer ref: ");
3511       print_generic_expr (vect_dump, base_name, TDF_SLIM);
3512     }
3513 
3514   /* (1) Create the new aggregate-pointer variable.  */
3515   aggr_ptr_type = build_pointer_type (aggr_type);
3516   base = get_base_address (DR_REF (dr));
3517   if (base
3518       && TREE_CODE (base) == MEM_REF)
3519     aggr_ptr_type
3520       = build_qualified_type (aggr_ptr_type,
3521 			      TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3522   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3523                                     get_name (base_name));
3524 
3525   /* Vector and array types inherit the alias set of their component
3526      type by default so we need to use a ref-all pointer if the data
3527      reference does not conflict with the created aggregated data
3528      reference because it is not addressable.  */
3529   if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3530 			      get_alias_set (DR_REF (dr))))
3531     {
3532       aggr_ptr_type
3533 	= build_pointer_type_for_mode (aggr_type,
3534 				       TYPE_MODE (aggr_ptr_type), true);
3535       aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3536 					get_name (base_name));
3537     }
3538 
3539   /* Likewise for any of the data references in the stmt group.  */
3540   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3541     {
3542       gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3543       do
3544 	{
3545 	  tree lhs = gimple_assign_lhs (orig_stmt);
3546 	  if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3547 				      get_alias_set (lhs)))
3548 	    {
3549 	      aggr_ptr_type
3550 		= build_pointer_type_for_mode (aggr_type,
3551 					       TYPE_MODE (aggr_ptr_type), true);
3552 	      aggr_ptr
3553 		= vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3554 					 get_name (base_name));
3555 	      break;
3556 	    }
3557 
3558 	  orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3559 	}
3560       while (orig_stmt);
3561     }
3562 
3563   add_referenced_var (aggr_ptr);
3564 
3565   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3566      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3567      def-use update cycles for the pointer: one relative to the outer-loop
3568      (LOOP), which is what steps (3) and (4) below do.  The other is relative
3569      to the inner-loop (which is the inner-most loop containing the dataref),
3570      and this is done be step (5) below.
3571 
3572      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3573      inner-most loop, and so steps (3),(4) work the same, and step (5) is
3574      redundant.  Steps (3),(4) create the following:
3575 
3576 	vp0 = &base_addr;
3577 	LOOP:	vp1 = phi(vp0,vp2)
3578 		...
3579 		...
3580 		vp2 = vp1 + step
3581 		goto LOOP
3582 
3583      If there is an inner-loop nested in loop, then step (5) will also be
3584      applied, and an additional update in the inner-loop will be created:
3585 
3586 	vp0 = &base_addr;
3587 	LOOP:   vp1 = phi(vp0,vp2)
3588 		...
3589         inner:     vp3 = phi(vp1,vp4)
3590 	           vp4 = vp3 + inner_step
3591 	           if () goto inner
3592 		...
3593 		vp2 = vp1 + step
3594 		if () goto LOOP   */
3595 
3596   /* (2) Calculate the initial address of the aggregate-pointer, and set
3597      the aggregate-pointer to point to it before the loop.  */
3598 
3599   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
3600 
3601   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3602                                                    offset, loop);
3603   if (new_stmt_list)
3604     {
3605       if (pe)
3606         {
3607           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3608           gcc_assert (!new_bb);
3609         }
3610       else
3611         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3612     }
3613 
3614   *initial_address = new_temp;
3615 
3616   /* Create: p = (aggr_type *) initial_base  */
3617   if (TREE_CODE (new_temp) != SSA_NAME
3618       || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3619     {
3620       vec_stmt = gimple_build_assign (aggr_ptr,
3621 				      fold_convert (aggr_ptr_type, new_temp));
3622       aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3623       /* Copy the points-to information if it exists. */
3624       if (DR_PTR_INFO (dr))
3625 	duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3626       gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3627       if (pe)
3628 	{
3629 	  new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3630 	  gcc_assert (!new_bb);
3631 	}
3632       else
3633 	gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3634     }
3635   else
3636     aggr_ptr_init = new_temp;
3637 
3638   /* (3) Handle the updating of the aggregate-pointer inside the loop.
3639      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3640      inner-loop nested in LOOP (during outer-loop vectorization).  */
3641 
3642   /* No update in loop is required.  */
3643   if (only_init && (!loop_vinfo || at_loop == loop))
3644     aptr = aggr_ptr_init;
3645   else
3646     {
3647       /* The step of the aggregate pointer is the type size.  */
3648       tree step = TYPE_SIZE_UNIT (aggr_type);
3649       /* One exception to the above is when the scalar step of the load in
3650 	 LOOP is zero. In this case the step here is also zero.  */
3651       if (*inv_p)
3652 	step = size_zero_node;
3653       else if (negative)
3654 	step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3655 
3656       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3657 
3658       create_iv (aggr_ptr_init,
3659 		 fold_convert (aggr_ptr_type, step),
3660 		 aggr_ptr, loop, &incr_gsi, insert_after,
3661 		 &indx_before_incr, &indx_after_incr);
3662       incr = gsi_stmt (incr_gsi);
3663       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3664 
3665       /* Copy the points-to information if it exists. */
3666       if (DR_PTR_INFO (dr))
3667 	{
3668 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3669 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3670 	}
3671       if (ptr_incr)
3672 	*ptr_incr = incr;
3673 
3674       aptr = indx_before_incr;
3675     }
3676 
3677   if (!nested_in_vect_loop || only_init)
3678     return aptr;
3679 
3680 
3681   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3682      nested in LOOP, if exists.  */
3683 
3684   gcc_assert (nested_in_vect_loop);
3685   if (!only_init)
3686     {
3687       standard_iv_increment_position (containing_loop, &incr_gsi,
3688 				      &insert_after);
3689       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3690 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3691 		 &indx_after_incr);
3692       incr = gsi_stmt (incr_gsi);
3693       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3694 
3695       /* Copy the points-to information if it exists. */
3696       if (DR_PTR_INFO (dr))
3697 	{
3698 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3699 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3700 	}
3701       if (ptr_incr)
3702 	*ptr_incr = incr;
3703 
3704       return indx_before_incr;
3705     }
3706   else
3707     gcc_unreachable ();
3708 }
3709 
3710 
3711 /* Function bump_vector_ptr
3712 
3713    Increment a pointer (to a vector type) by vector-size. If requested,
3714    i.e. if PTR-INCR is given, then also connect the new increment stmt
3715    to the existing def-use update-chain of the pointer, by modifying
3716    the PTR_INCR as illustrated below:
3717 
3718    The pointer def-use update-chain before this function:
3719                         DATAREF_PTR = phi (p_0, p_2)
3720                         ....
3721         PTR_INCR:       p_2 = DATAREF_PTR + step
3722 
3723    The pointer def-use update-chain after this function:
3724                         DATAREF_PTR = phi (p_0, p_2)
3725                         ....
3726                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3727                         ....
3728         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
3729 
3730    Input:
3731    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3732                  in the loop.
3733    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3734 	      the loop.  The increment amount across iterations is expected
3735 	      to be vector_size.
3736    BSI - location where the new update stmt is to be placed.
3737    STMT - the original scalar memory-access stmt that is being vectorized.
3738    BUMP - optional. The offset by which to bump the pointer. If not given,
3739 	  the offset is assumed to be vector_size.
3740 
3741    Output: Return NEW_DATAREF_PTR as illustrated above.
3742 
3743 */
3744 
3745 tree
3746 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3747 		 gimple stmt, tree bump)
3748 {
3749   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3750   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3751   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3752   tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3753   tree update = TYPE_SIZE_UNIT (vectype);
3754   gimple incr_stmt;
3755   ssa_op_iter iter;
3756   use_operand_p use_p;
3757   tree new_dataref_ptr;
3758 
3759   if (bump)
3760     update = bump;
3761 
3762   incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3763 					    dataref_ptr, update);
3764   new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3765   gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3766   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3767 
3768   /* Copy the points-to information if it exists. */
3769   if (DR_PTR_INFO (dr))
3770     {
3771       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3772       SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3773       SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3774     }
3775 
3776   if (!ptr_incr)
3777     return new_dataref_ptr;
3778 
3779   /* Update the vector-pointer's cross-iteration increment.  */
3780   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3781     {
3782       tree use = USE_FROM_PTR (use_p);
3783 
3784       if (use == dataref_ptr)
3785         SET_USE (use_p, new_dataref_ptr);
3786       else
3787         gcc_assert (tree_int_cst_compare (use, update) == 0);
3788     }
3789 
3790   return new_dataref_ptr;
3791 }
3792 
3793 
3794 /* Function vect_create_destination_var.
3795 
3796    Create a new temporary of type VECTYPE.  */
3797 
3798 tree
3799 vect_create_destination_var (tree scalar_dest, tree vectype)
3800 {
3801   tree vec_dest;
3802   const char *new_name;
3803   tree type;
3804   enum vect_var_kind kind;
3805 
3806   kind = vectype ? vect_simple_var : vect_scalar_var;
3807   type = vectype ? vectype : TREE_TYPE (scalar_dest);
3808 
3809   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3810 
3811   new_name = get_name (scalar_dest);
3812   if (!new_name)
3813     new_name = "var_";
3814   vec_dest = vect_get_new_vect_var (type, kind, new_name);
3815   add_referenced_var (vec_dest);
3816 
3817   return vec_dest;
3818 }
3819 
3820 /* Function vect_strided_store_supported.
3821 
3822    Returns TRUE if interleave high and interleave low permutations
3823    are supported, and FALSE otherwise.  */
3824 
3825 bool
3826 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3827 {
3828   enum machine_mode mode = TYPE_MODE (vectype);
3829 
3830   /* vect_permute_store_chain requires the group size to be a power of two.  */
3831   if (exact_log2 (count) == -1)
3832     {
3833       if (vect_print_dump_info (REPORT_DETAILS))
3834 	fprintf (vect_dump, "the size of the group of strided accesses"
3835 		 " is not a power of 2");
3836       return false;
3837     }
3838 
3839   /* Check that the permutation is supported.  */
3840   if (VECTOR_MODE_P (mode))
3841     {
3842       unsigned int i, nelt = GET_MODE_NUNITS (mode);
3843       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3844       for (i = 0; i < nelt / 2; i++)
3845 	{
3846 	  sel[i * 2] = i;
3847 	  sel[i * 2 + 1] = i + nelt;
3848 	}
3849       if (can_vec_perm_p (mode, false, sel))
3850 	{
3851 	  for (i = 0; i < nelt; i++)
3852 	    sel[i] += nelt / 2;
3853 	  if (can_vec_perm_p (mode, false, sel))
3854 	    return true;
3855 	}
3856     }
3857 
3858   if (vect_print_dump_info (REPORT_DETAILS))
3859     fprintf (vect_dump, "interleave op not supported by target.");
3860   return false;
3861 }
3862 
3863 
3864 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3865    type VECTYPE.  */
3866 
3867 bool
3868 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3869 {
3870   return vect_lanes_optab_supported_p ("vec_store_lanes",
3871 				       vec_store_lanes_optab,
3872 				       vectype, count);
3873 }
3874 
3875 
3876 /* Function vect_permute_store_chain.
3877 
3878    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3879    a power of 2, generate interleave_high/low stmts to reorder the data
3880    correctly for the stores.  Return the final references for stores in
3881    RESULT_CHAIN.
3882 
3883    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3884    The input is 4 vectors each containing 8 elements.  We assign a number to
3885    each element, the input sequence is:
3886 
3887    1st vec:   0  1  2  3  4  5  6  7
3888    2nd vec:   8  9 10 11 12 13 14 15
3889    3rd vec:  16 17 18 19 20 21 22 23
3890    4th vec:  24 25 26 27 28 29 30 31
3891 
3892    The output sequence should be:
3893 
3894    1st vec:  0  8 16 24  1  9 17 25
3895    2nd vec:  2 10 18 26  3 11 19 27
3896    3rd vec:  4 12 20 28  5 13 21 30
3897    4th vec:  6 14 22 30  7 15 23 31
3898 
3899    i.e., we interleave the contents of the four vectors in their order.
3900 
3901    We use interleave_high/low instructions to create such output.  The input of
3902    each interleave_high/low operation is two vectors:
3903    1st vec    2nd vec
3904    0 1 2 3    4 5 6 7
3905    the even elements of the result vector are obtained left-to-right from the
3906    high/low elements of the first vector.  The odd elements of the result are
3907    obtained left-to-right from the high/low elements of the second vector.
3908    The output of interleave_high will be:   0 4 1 5
3909    and of interleave_low:                   2 6 3 7
3910 
3911 
3912    The permutation is done in log LENGTH stages.  In each stage interleave_high
3913    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3914    where the first argument is taken from the first half of DR_CHAIN and the
3915    second argument from it's second half.
3916    In our example,
3917 
3918    I1: interleave_high (1st vec, 3rd vec)
3919    I2: interleave_low (1st vec, 3rd vec)
3920    I3: interleave_high (2nd vec, 4th vec)
3921    I4: interleave_low (2nd vec, 4th vec)
3922 
3923    The output for the first stage is:
3924 
3925    I1:  0 16  1 17  2 18  3 19
3926    I2:  4 20  5 21  6 22  7 23
3927    I3:  8 24  9 25 10 26 11 27
3928    I4: 12 28 13 29 14 30 15 31
3929 
3930    The output of the second stage, i.e. the final result is:
3931 
3932    I1:  0  8 16 24  1  9 17 25
3933    I2:  2 10 18 26  3 11 19 27
3934    I3:  4 12 20 28  5 13 21 30
3935    I4:  6 14 22 30  7 15 23 31.  */
3936 
3937 void
3938 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3939 			  unsigned int length,
3940 			  gimple stmt,
3941 			  gimple_stmt_iterator *gsi,
3942 			  VEC(tree,heap) **result_chain)
3943 {
3944   tree perm_dest, vect1, vect2, high, low;
3945   gimple perm_stmt;
3946   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3947   tree perm_mask_low, perm_mask_high;
3948   unsigned int i, n;
3949   unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
3950   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3951 
3952   *result_chain = VEC_copy (tree, heap, dr_chain);
3953 
3954   for (i = 0, n = nelt / 2; i < n; i++)
3955     {
3956       sel[i * 2] = i;
3957       sel[i * 2 + 1] = i + nelt;
3958     }
3959   perm_mask_high = vect_gen_perm_mask (vectype, sel);
3960   gcc_assert (perm_mask_high != NULL);
3961 
3962   for (i = 0; i < nelt; i++)
3963     sel[i] += nelt / 2;
3964   perm_mask_low = vect_gen_perm_mask (vectype, sel);
3965   gcc_assert (perm_mask_low != NULL);
3966 
3967   for (i = 0, n = exact_log2 (length); i < n; i++)
3968     {
3969       for (j = 0; j < length/2; j++)
3970 	{
3971 	  vect1 = VEC_index (tree, dr_chain, j);
3972 	  vect2 = VEC_index (tree, dr_chain, j+length/2);
3973 
3974 	  /* Create interleaving stmt:
3975 	     high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}>  */
3976 	  perm_dest = create_tmp_var (vectype, "vect_inter_high");
3977 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
3978 	  add_referenced_var (perm_dest);
3979 	  high = make_ssa_name (perm_dest, NULL);
3980 	  perm_stmt
3981 	    = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
3982 					     vect1, vect2, perm_mask_high);
3983 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3984 	  VEC_replace (tree, *result_chain, 2*j, high);
3985 
3986 	  /* Create interleaving stmt:
3987 	     low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
3988 						 nelt*3/2+1, ...}>  */
3989 	  perm_dest = create_tmp_var (vectype, "vect_inter_low");
3990 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
3991 	  add_referenced_var (perm_dest);
3992 	  low = make_ssa_name (perm_dest, NULL);
3993 	  perm_stmt
3994 	    = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
3995 					     vect1, vect2, perm_mask_low);
3996 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3997 	  VEC_replace (tree, *result_chain, 2*j+1, low);
3998 	}
3999       dr_chain = VEC_copy (tree, heap, *result_chain);
4000     }
4001 }
4002 
4003 /* Function vect_setup_realignment
4004 
4005    This function is called when vectorizing an unaligned load using
4006    the dr_explicit_realign[_optimized] scheme.
4007    This function generates the following code at the loop prolog:
4008 
4009       p = initial_addr;
4010    x  msq_init = *(floor(p));   # prolog load
4011       realignment_token = call target_builtin;
4012     loop:
4013    x  msq = phi (msq_init, ---)
4014 
4015    The stmts marked with x are generated only for the case of
4016    dr_explicit_realign_optimized.
4017 
4018    The code above sets up a new (vector) pointer, pointing to the first
4019    location accessed by STMT, and a "floor-aligned" load using that pointer.
4020    It also generates code to compute the "realignment-token" (if the relevant
4021    target hook was defined), and creates a phi-node at the loop-header bb
4022    whose arguments are the result of the prolog-load (created by this
4023    function) and the result of a load that takes place in the loop (to be
4024    created by the caller to this function).
4025 
4026    For the case of dr_explicit_realign_optimized:
4027    The caller to this function uses the phi-result (msq) to create the
4028    realignment code inside the loop, and sets up the missing phi argument,
4029    as follows:
4030     loop:
4031       msq = phi (msq_init, lsq)
4032       lsq = *(floor(p'));        # load in loop
4033       result = realign_load (msq, lsq, realignment_token);
4034 
4035    For the case of dr_explicit_realign:
4036     loop:
4037       msq = *(floor(p)); 	# load in loop
4038       p' = p + (VS-1);
4039       lsq = *(floor(p'));	# load in loop
4040       result = realign_load (msq, lsq, realignment_token);
4041 
4042    Input:
4043    STMT - (scalar) load stmt to be vectorized. This load accesses
4044           a memory location that may be unaligned.
4045    BSI - place where new code is to be inserted.
4046    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4047 			      is used.
4048 
4049    Output:
4050    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4051                        target hook, if defined.
4052    Return value - the result of the loop-header phi node.  */
4053 
4054 tree
4055 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4056                         tree *realignment_token,
4057 			enum dr_alignment_support alignment_support_scheme,
4058 			tree init_addr,
4059 			struct loop **at_loop)
4060 {
4061   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4062   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4063   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4064   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4065   struct loop *loop = NULL;
4066   edge pe = NULL;
4067   tree scalar_dest = gimple_assign_lhs (stmt);
4068   tree vec_dest;
4069   gimple inc;
4070   tree ptr;
4071   tree data_ref;
4072   gimple new_stmt;
4073   basic_block new_bb;
4074   tree msq_init = NULL_TREE;
4075   tree new_temp;
4076   gimple phi_stmt;
4077   tree msq = NULL_TREE;
4078   gimple_seq stmts = NULL;
4079   bool inv_p;
4080   bool compute_in_loop = false;
4081   bool nested_in_vect_loop = false;
4082   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4083   struct loop *loop_for_initial_load = NULL;
4084 
4085   if (loop_vinfo)
4086     {
4087       loop = LOOP_VINFO_LOOP (loop_vinfo);
4088       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4089     }
4090 
4091   gcc_assert (alignment_support_scheme == dr_explicit_realign
4092 	      || alignment_support_scheme == dr_explicit_realign_optimized);
4093 
4094   /* We need to generate three things:
4095      1. the misalignment computation
4096      2. the extra vector load (for the optimized realignment scheme).
4097      3. the phi node for the two vectors from which the realignment is
4098       done (for the optimized realignment scheme).  */
4099 
4100   /* 1. Determine where to generate the misalignment computation.
4101 
4102      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4103      calculation will be generated by this function, outside the loop (in the
4104      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
4105      caller, inside the loop.
4106 
4107      Background: If the misalignment remains fixed throughout the iterations of
4108      the loop, then both realignment schemes are applicable, and also the
4109      misalignment computation can be done outside LOOP.  This is because we are
4110      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4111      are a multiple of VS (the Vector Size), and therefore the misalignment in
4112      different vectorized LOOP iterations is always the same.
4113      The problem arises only if the memory access is in an inner-loop nested
4114      inside LOOP, which is now being vectorized using outer-loop vectorization.
4115      This is the only case when the misalignment of the memory access may not
4116      remain fixed throughout the iterations of the inner-loop (as explained in
4117      detail in vect_supportable_dr_alignment).  In this case, not only is the
4118      optimized realignment scheme not applicable, but also the misalignment
4119      computation (and generation of the realignment token that is passed to
4120      REALIGN_LOAD) have to be done inside the loop.
4121 
4122      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4123      or not, which in turn determines if the misalignment is computed inside
4124      the inner-loop, or outside LOOP.  */
4125 
4126   if (init_addr != NULL_TREE || !loop_vinfo)
4127     {
4128       compute_in_loop = true;
4129       gcc_assert (alignment_support_scheme == dr_explicit_realign);
4130     }
4131 
4132 
4133   /* 2. Determine where to generate the extra vector load.
4134 
4135      For the optimized realignment scheme, instead of generating two vector
4136      loads in each iteration, we generate a single extra vector load in the
4137      preheader of the loop, and in each iteration reuse the result of the
4138      vector load from the previous iteration.  In case the memory access is in
4139      an inner-loop nested inside LOOP, which is now being vectorized using
4140      outer-loop vectorization, we need to determine whether this initial vector
4141      load should be generated at the preheader of the inner-loop, or can be
4142      generated at the preheader of LOOP.  If the memory access has no evolution
4143      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4144      to be generated inside LOOP (in the preheader of the inner-loop).  */
4145 
4146   if (nested_in_vect_loop)
4147     {
4148       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4149       bool invariant_in_outerloop =
4150             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4151       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4152     }
4153   else
4154     loop_for_initial_load = loop;
4155   if (at_loop)
4156     *at_loop = loop_for_initial_load;
4157 
4158   if (loop_for_initial_load)
4159     pe = loop_preheader_edge (loop_for_initial_load);
4160 
4161   /* 3. For the case of the optimized realignment, create the first vector
4162       load at the loop preheader.  */
4163 
4164   if (alignment_support_scheme == dr_explicit_realign_optimized)
4165     {
4166       /* Create msq_init = *(floor(p1)) in the loop preheader  */
4167 
4168       gcc_assert (!compute_in_loop);
4169       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4170       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4171 				      NULL_TREE, &init_addr, NULL, &inc,
4172 				      true, &inv_p);
4173       new_stmt = gimple_build_assign_with_ops
4174 		   (BIT_AND_EXPR, NULL_TREE, ptr,
4175 		    build_int_cst (TREE_TYPE (ptr),
4176 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4177       new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
4178       gimple_assign_set_lhs (new_stmt, new_temp);
4179       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4180       gcc_assert (!new_bb);
4181       data_ref
4182 	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4183 		  build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4184       new_stmt = gimple_build_assign (vec_dest, data_ref);
4185       new_temp = make_ssa_name (vec_dest, new_stmt);
4186       gimple_assign_set_lhs (new_stmt, new_temp);
4187       mark_symbols_for_renaming (new_stmt);
4188       if (pe)
4189         {
4190           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4191           gcc_assert (!new_bb);
4192         }
4193       else
4194          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4195 
4196       msq_init = gimple_assign_lhs (new_stmt);
4197     }
4198 
4199   /* 4. Create realignment token using a target builtin, if available.
4200       It is done either inside the containing loop, or before LOOP (as
4201       determined above).  */
4202 
4203   if (targetm.vectorize.builtin_mask_for_load)
4204     {
4205       tree builtin_decl;
4206 
4207       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
4208       if (!init_addr)
4209 	{
4210 	  /* Generate the INIT_ADDR computation outside LOOP.  */
4211 	  init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4212 							NULL_TREE, loop);
4213           if (loop)
4214             {
4215    	      pe = loop_preheader_edge (loop);
4216 	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4217 	      gcc_assert (!new_bb);
4218             }
4219           else
4220              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4221 	}
4222 
4223       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4224       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4225       vec_dest =
4226 	vect_create_destination_var (scalar_dest,
4227 				     gimple_call_return_type (new_stmt));
4228       new_temp = make_ssa_name (vec_dest, new_stmt);
4229       gimple_call_set_lhs (new_stmt, new_temp);
4230 
4231       if (compute_in_loop)
4232 	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4233       else
4234 	{
4235 	  /* Generate the misalignment computation outside LOOP.  */
4236 	  pe = loop_preheader_edge (loop);
4237 	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4238 	  gcc_assert (!new_bb);
4239 	}
4240 
4241       *realignment_token = gimple_call_lhs (new_stmt);
4242 
4243       /* The result of the CALL_EXPR to this builtin is determined from
4244          the value of the parameter and no global variables are touched
4245          which makes the builtin a "const" function.  Requiring the
4246          builtin to have the "const" attribute makes it unnecessary
4247          to call mark_call_clobbered.  */
4248       gcc_assert (TREE_READONLY (builtin_decl));
4249     }
4250 
4251   if (alignment_support_scheme == dr_explicit_realign)
4252     return msq;
4253 
4254   gcc_assert (!compute_in_loop);
4255   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4256 
4257 
4258   /* 5. Create msq = phi <msq_init, lsq> in loop  */
4259 
4260   pe = loop_preheader_edge (containing_loop);
4261   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4262   msq = make_ssa_name (vec_dest, NULL);
4263   phi_stmt = create_phi_node (msq, containing_loop->header);
4264   SSA_NAME_DEF_STMT (msq) = phi_stmt;
4265   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4266 
4267   return msq;
4268 }
4269 
4270 
4271 /* Function vect_strided_load_supported.
4272 
4273    Returns TRUE if even and odd permutations are supported,
4274    and FALSE otherwise.  */
4275 
4276 bool
4277 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4278 {
4279   enum machine_mode mode = TYPE_MODE (vectype);
4280 
4281   /* vect_permute_load_chain requires the group size to be a power of two.  */
4282   if (exact_log2 (count) == -1)
4283     {
4284       if (vect_print_dump_info (REPORT_DETAILS))
4285 	fprintf (vect_dump, "the size of the group of strided accesses"
4286 		 " is not a power of 2");
4287       return false;
4288     }
4289 
4290   /* Check that the permutation is supported.  */
4291   if (VECTOR_MODE_P (mode))
4292     {
4293       unsigned int i, nelt = GET_MODE_NUNITS (mode);
4294       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4295 
4296       for (i = 0; i < nelt; i++)
4297 	sel[i] = i * 2;
4298       if (can_vec_perm_p (mode, false, sel))
4299 	{
4300 	  for (i = 0; i < nelt; i++)
4301 	    sel[i] = i * 2 + 1;
4302 	  if (can_vec_perm_p (mode, false, sel))
4303 	    return true;
4304 	}
4305     }
4306 
4307   if (vect_print_dump_info (REPORT_DETAILS))
4308     fprintf (vect_dump, "extract even/odd not supported by target");
4309   return false;
4310 }
4311 
4312 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4313    type VECTYPE.  */
4314 
4315 bool
4316 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4317 {
4318   return vect_lanes_optab_supported_p ("vec_load_lanes",
4319 				       vec_load_lanes_optab,
4320 				       vectype, count);
4321 }
4322 
4323 /* Function vect_permute_load_chain.
4324 
4325    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4326    a power of 2, generate extract_even/odd stmts to reorder the input data
4327    correctly.  Return the final references for loads in RESULT_CHAIN.
4328 
4329    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4330    The input is 4 vectors each containing 8 elements. We assign a number to each
4331    element, the input sequence is:
4332 
4333    1st vec:   0  1  2  3  4  5  6  7
4334    2nd vec:   8  9 10 11 12 13 14 15
4335    3rd vec:  16 17 18 19 20 21 22 23
4336    4th vec:  24 25 26 27 28 29 30 31
4337 
4338    The output sequence should be:
4339 
4340    1st vec:  0 4  8 12 16 20 24 28
4341    2nd vec:  1 5  9 13 17 21 25 29
4342    3rd vec:  2 6 10 14 18 22 26 30
4343    4th vec:  3 7 11 15 19 23 27 31
4344 
4345    i.e., the first output vector should contain the first elements of each
4346    interleaving group, etc.
4347 
4348    We use extract_even/odd instructions to create such output.  The input of
4349    each extract_even/odd operation is two vectors
4350    1st vec    2nd vec
4351    0 1 2 3    4 5 6 7
4352 
4353    and the output is the vector of extracted even/odd elements.  The output of
4354    extract_even will be:   0 2 4 6
4355    and of extract_odd:     1 3 5 7
4356 
4357 
4358    The permutation is done in log LENGTH stages.  In each stage extract_even
4359    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4360    their order.  In our example,
4361 
4362    E1: extract_even (1st vec, 2nd vec)
4363    E2: extract_odd (1st vec, 2nd vec)
4364    E3: extract_even (3rd vec, 4th vec)
4365    E4: extract_odd (3rd vec, 4th vec)
4366 
4367    The output for the first stage will be:
4368 
4369    E1:  0  2  4  6  8 10 12 14
4370    E2:  1  3  5  7  9 11 13 15
4371    E3: 16 18 20 22 24 26 28 30
4372    E4: 17 19 21 23 25 27 29 31
4373 
4374    In order to proceed and create the correct sequence for the next stage (or
4375    for the correct output, if the second stage is the last one, as in our
4376    example), we first put the output of extract_even operation and then the
4377    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4378    The input for the second stage is:
4379 
4380    1st vec (E1):  0  2  4  6  8 10 12 14
4381    2nd vec (E3): 16 18 20 22 24 26 28 30
4382    3rd vec (E2):  1  3  5  7  9 11 13 15
4383    4th vec (E4): 17 19 21 23 25 27 29 31
4384 
4385    The output of the second stage:
4386 
4387    E1: 0 4  8 12 16 20 24 28
4388    E2: 2 6 10 14 18 22 26 30
4389    E3: 1 5  9 13 17 21 25 29
4390    E4: 3 7 11 15 19 23 27 31
4391 
4392    And RESULT_CHAIN after reordering:
4393 
4394    1st vec (E1):  0 4  8 12 16 20 24 28
4395    2nd vec (E3):  1 5  9 13 17 21 25 29
4396    3rd vec (E2):  2 6 10 14 18 22 26 30
4397    4th vec (E4):  3 7 11 15 19 23 27 31.  */
4398 
4399 static void
4400 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4401 			 unsigned int length,
4402 			 gimple stmt,
4403 			 gimple_stmt_iterator *gsi,
4404 			 VEC(tree,heap) **result_chain)
4405 {
4406   tree perm_dest, data_ref, first_vect, second_vect;
4407   tree perm_mask_even, perm_mask_odd;
4408   gimple perm_stmt;
4409   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4410   unsigned int i, j, log_length = exact_log2 (length);
4411   unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4412   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4413 
4414   *result_chain = VEC_copy (tree, heap, dr_chain);
4415 
4416   for (i = 0; i < nelt; ++i)
4417     sel[i] = i * 2;
4418   perm_mask_even = vect_gen_perm_mask (vectype, sel);
4419   gcc_assert (perm_mask_even != NULL);
4420 
4421   for (i = 0; i < nelt; ++i)
4422     sel[i] = i * 2 + 1;
4423   perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4424   gcc_assert (perm_mask_odd != NULL);
4425 
4426   for (i = 0; i < log_length; i++)
4427     {
4428       for (j = 0; j < length; j += 2)
4429 	{
4430 	  first_vect = VEC_index (tree, dr_chain, j);
4431 	  second_vect = VEC_index (tree, dr_chain, j+1);
4432 
4433 	  /* data_ref = permute_even (first_data_ref, second_data_ref);  */
4434 	  perm_dest = create_tmp_var (vectype, "vect_perm_even");
4435 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
4436 	  add_referenced_var (perm_dest);
4437 
4438 	  perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4439 						     first_vect, second_vect,
4440 						     perm_mask_even);
4441 
4442 	  data_ref = make_ssa_name (perm_dest, perm_stmt);
4443 	  gimple_assign_set_lhs (perm_stmt, data_ref);
4444 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4445 	  mark_symbols_for_renaming (perm_stmt);
4446 
4447 	  VEC_replace (tree, *result_chain, j/2, data_ref);
4448 
4449 	  /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
4450 	  perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4451 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
4452 	  add_referenced_var (perm_dest);
4453 
4454 	  perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4455 						     first_vect, second_vect,
4456 						     perm_mask_odd);
4457 
4458 	  data_ref = make_ssa_name (perm_dest, perm_stmt);
4459 	  gimple_assign_set_lhs (perm_stmt, data_ref);
4460 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4461 	  mark_symbols_for_renaming (perm_stmt);
4462 
4463 	  VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4464 	}
4465       dr_chain = VEC_copy (tree, heap, *result_chain);
4466     }
4467 }
4468 
4469 
4470 /* Function vect_transform_strided_load.
4471 
4472    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4473    to perform their permutation and ascribe the result vectorized statements to
4474    the scalar statements.
4475 */
4476 
4477 void
4478 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4479 			     gimple_stmt_iterator *gsi)
4480 {
4481   VEC(tree,heap) *result_chain = NULL;
4482 
4483   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4484      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4485      vectors, that are ready for vector computation.  */
4486   result_chain = VEC_alloc (tree, heap, size);
4487   vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4488   vect_record_strided_load_vectors (stmt, result_chain);
4489   VEC_free (tree, heap, result_chain);
4490 }
4491 
4492 /* RESULT_CHAIN contains the output of a group of strided loads that were
4493    generated as part of the vectorization of STMT.  Assign the statement
4494    for each vector to the associated scalar statement.  */
4495 
4496 void
4497 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4498 {
4499   gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4500   gimple next_stmt, new_stmt;
4501   unsigned int i, gap_count;
4502   tree tmp_data_ref;
4503 
4504   /* Put a permuted data-ref in the VECTORIZED_STMT field.
4505      Since we scan the chain starting from it's first node, their order
4506      corresponds the order of data-refs in RESULT_CHAIN.  */
4507   next_stmt = first_stmt;
4508   gap_count = 1;
4509   FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4510     {
4511       if (!next_stmt)
4512 	break;
4513 
4514       /* Skip the gaps.  Loads created for the gaps will be removed by dead
4515        code elimination pass later.  No need to check for the first stmt in
4516        the group, since it always exists.
4517        GROUP_GAP is the number of steps in elements from the previous
4518        access (if there is no gap GROUP_GAP is 1).  We skip loads that
4519        correspond to the gaps.  */
4520       if (next_stmt != first_stmt
4521           && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4522       {
4523         gap_count++;
4524         continue;
4525       }
4526 
4527       while (next_stmt)
4528         {
4529 	  new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4530 	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4531 	     copies, and we put the new vector statement in the first available
4532 	     RELATED_STMT.  */
4533 	  if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4534 	    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4535 	  else
4536             {
4537               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4538                 {
4539  	          gimple prev_stmt =
4540 		    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4541 	          gimple rel_stmt =
4542 		    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4543 	          while (rel_stmt)
4544 		    {
4545 		      prev_stmt = rel_stmt;
4546 		      rel_stmt =
4547                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4548 		    }
4549 
4550   	          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4551                     new_stmt;
4552                 }
4553             }
4554 
4555 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4556 	  gap_count = 1;
4557 	  /* If NEXT_STMT accesses the same DR as the previous statement,
4558 	     put the same TMP_DATA_REF as its vectorized statement; otherwise
4559 	     get the next data-ref from RESULT_CHAIN.  */
4560 	  if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4561 	    break;
4562         }
4563     }
4564 }
4565 
4566 /* Function vect_force_dr_alignment_p.
4567 
4568    Returns whether the alignment of a DECL can be forced to be aligned
4569    on ALIGNMENT bit boundary.  */
4570 
4571 bool
4572 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4573 {
4574   if (TREE_CODE (decl) != VAR_DECL)
4575     return false;
4576 
4577   if (DECL_EXTERNAL (decl))
4578     return false;
4579 
4580   if (TREE_ASM_WRITTEN (decl))
4581     return false;
4582 
4583   /* Do not override explicit alignment set by the user when an explicit
4584      section name is also used.  This is a common idiom used by many
4585      software projects.  */
4586   if (DECL_SECTION_NAME (decl) != NULL_TREE
4587       && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
4588     return false;
4589 
4590   if (TREE_STATIC (decl))
4591     return (alignment <= MAX_OFILE_ALIGNMENT);
4592   else
4593     return (alignment <= MAX_STACK_ALIGNMENT);
4594 }
4595 
4596 
4597 /* Return whether the data reference DR is supported with respect to its
4598    alignment.
4599    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4600    it is aligned, i.e., check if it is possible to vectorize it with different
4601    alignment.  */
4602 
4603 enum dr_alignment_support
4604 vect_supportable_dr_alignment (struct data_reference *dr,
4605                                bool check_aligned_accesses)
4606 {
4607   gimple stmt = DR_STMT (dr);
4608   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4609   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4610   enum machine_mode mode = TYPE_MODE (vectype);
4611   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4612   struct loop *vect_loop = NULL;
4613   bool nested_in_vect_loop = false;
4614 
4615   if (aligned_access_p (dr) && !check_aligned_accesses)
4616     return dr_aligned;
4617 
4618   if (loop_vinfo)
4619     {
4620       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4621       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4622     }
4623 
4624   /* Possibly unaligned access.  */
4625 
4626   /* We can choose between using the implicit realignment scheme (generating
4627      a misaligned_move stmt) and the explicit realignment scheme (generating
4628      aligned loads with a REALIGN_LOAD).  There are two variants to the
4629      explicit realignment scheme: optimized, and unoptimized.
4630      We can optimize the realignment only if the step between consecutive
4631      vector loads is equal to the vector size.  Since the vector memory
4632      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4633      is guaranteed that the misalignment amount remains the same throughout the
4634      execution of the vectorized loop.  Therefore, we can create the
4635      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4636      at the loop preheader.
4637 
4638      However, in the case of outer-loop vectorization, when vectorizing a
4639      memory access in the inner-loop nested within the LOOP that is now being
4640      vectorized, while it is guaranteed that the misalignment of the
4641      vectorized memory access will remain the same in different outer-loop
4642      iterations, it is *not* guaranteed that is will remain the same throughout
4643      the execution of the inner-loop.  This is because the inner-loop advances
4644      with the original scalar step (and not in steps of VS).  If the inner-loop
4645      step happens to be a multiple of VS, then the misalignment remains fixed
4646      and we can use the optimized realignment scheme.  For example:
4647 
4648       for (i=0; i<N; i++)
4649         for (j=0; j<M; j++)
4650           s += a[i+j];
4651 
4652      When vectorizing the i-loop in the above example, the step between
4653      consecutive vector loads is 1, and so the misalignment does not remain
4654      fixed across the execution of the inner-loop, and the realignment cannot
4655      be optimized (as illustrated in the following pseudo vectorized loop):
4656 
4657       for (i=0; i<N; i+=4)
4658         for (j=0; j<M; j++){
4659           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4660                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
4661                          // (assuming that we start from an aligned address).
4662           }
4663 
4664      We therefore have to use the unoptimized realignment scheme:
4665 
4666       for (i=0; i<N; i+=4)
4667           for (j=k; j<M; j+=4)
4668           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4669                            // that the misalignment of the initial address is
4670                            // 0).
4671 
4672      The loop can then be vectorized as follows:
4673 
4674       for (k=0; k<4; k++){
4675         rt = get_realignment_token (&vp[k]);
4676         for (i=0; i<N; i+=4){
4677           v1 = vp[i+k];
4678           for (j=k; j<M; j+=4){
4679             v2 = vp[i+j+VS-1];
4680             va = REALIGN_LOAD <v1,v2,rt>;
4681             vs += va;
4682             v1 = v2;
4683           }
4684         }
4685     } */
4686 
4687   if (DR_IS_READ (dr))
4688     {
4689       bool is_packed = false;
4690       tree type = (TREE_TYPE (DR_REF (dr)));
4691 
4692       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4693 	  && (!targetm.vectorize.builtin_mask_for_load
4694 	      || targetm.vectorize.builtin_mask_for_load ()))
4695 	{
4696 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4697 	  if ((nested_in_vect_loop
4698 	       && (TREE_INT_CST_LOW (DR_STEP (dr))
4699 	 	   != GET_MODE_SIZE (TYPE_MODE (vectype))))
4700               || !loop_vinfo)
4701 	    return dr_explicit_realign;
4702 	  else
4703 	    return dr_explicit_realign_optimized;
4704 	}
4705       if (!known_alignment_for_access_p (dr))
4706 	is_packed = contains_packed_reference (DR_REF (dr));
4707 
4708       if (targetm.vectorize.
4709 	  support_vector_misalignment (mode, type,
4710 				       DR_MISALIGNMENT (dr), is_packed))
4711 	/* Can't software pipeline the loads, but can at least do them.  */
4712 	return dr_unaligned_supported;
4713     }
4714   else
4715     {
4716       bool is_packed = false;
4717       tree type = (TREE_TYPE (DR_REF (dr)));
4718 
4719       if (!known_alignment_for_access_p (dr))
4720 	is_packed = contains_packed_reference (DR_REF (dr));
4721 
4722      if (targetm.vectorize.
4723          support_vector_misalignment (mode, type,
4724 				      DR_MISALIGNMENT (dr), is_packed))
4725        return dr_unaligned_supported;
4726     }
4727 
4728   /* Unsupported.  */
4729   return dr_unaligned_unsupported;
4730 }
4731