1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 
42 /* Main analysis functions.  */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static void vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static bool vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
54 
55 /* Utility functions for the analyses.  */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61   (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66   (struct data_reference *, struct data_reference *, int npeel);
67 
68 
69 /* Function vect_determine_vectorization_factor
70 
71    Determine the vectorization factor (VF). VF is the number of data elements
72    that are operated upon in parallel in a single iteration of the vectorized
73    loop. For example, when vectorizing a loop that operates on 4byte elements,
74    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75    elements can fit in a single vector register.
76 
77    We currently support vectorization of loops in which all types operated upon
78    are of the same size. Therefore this function currently sets VF according to
79    the size of the types operated upon, and fails if there are multiple sizes
80    in the loop.
81 
82    VF is also the factor by which the loop iterations are strip-mined, e.g.:
83    original loop:
84         for (i=0; i<N; i++){
85           a[i] = b[i] + c[i];
86         }
87 
88    vectorized loop:
89         for (i=0; i<N; i+=VF){
90           a[i:VF] = b[i:VF] + c[i:VF];
91         }
92 */
93 
94 static bool
vect_determine_vectorization_factor(loop_vec_info loop_vinfo)95 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 {
97   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99   int nbbs = loop->num_nodes;
100   block_stmt_iterator si;
101   unsigned int vectorization_factor = 0;
102   int i;
103   tree scalar_type;
104 
105   if (vect_print_dump_info (REPORT_DETAILS))
106     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
107 
108   for (i = 0; i < nbbs; i++)
109     {
110       basic_block bb = bbs[i];
111 
112       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
113         {
114           tree stmt = bsi_stmt (si);
115           unsigned int nunits;
116           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117           tree vectype;
118 
119           if (vect_print_dump_info (REPORT_DETAILS))
120             {
121               fprintf (vect_dump, "==> examining statement: ");
122               print_generic_expr (vect_dump, stmt, TDF_SLIM);
123             }
124 
125           gcc_assert (stmt_info);
126           /* skip stmts which do not need to be vectorized.  */
127           if (!STMT_VINFO_RELEVANT_P (stmt_info)
128 	      && !STMT_VINFO_LIVE_P (stmt_info))
129             {
130               if (vect_print_dump_info (REPORT_DETAILS))
131                 fprintf (vect_dump, "skip.");
132               continue;
133             }
134 
135           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136             {
137               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
138                 {
139                   fprintf (vect_dump, "not vectorized: vector stmt in loop:");
140                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
141                 }
142               return false;
143             }
144 
145           if (STMT_VINFO_DATA_REF (stmt_info))
146             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
147           else if (TREE_CODE (stmt) == MODIFY_EXPR)
148             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
149           else
150             scalar_type = TREE_TYPE (stmt);
151 
152           if (vect_print_dump_info (REPORT_DETAILS))
153             {
154               fprintf (vect_dump, "get vectype for scalar type:  ");
155               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
156             }
157 
158           vectype = get_vectype_for_scalar_type (scalar_type);
159           if (!vectype)
160             {
161               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
162                 {
163                   fprintf (vect_dump, "not vectorized: unsupported data-type ");
164                   print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165                 }
166               return false;
167             }
168           if (vect_print_dump_info (REPORT_DETAILS))
169             {
170               fprintf (vect_dump, "vectype: ");
171               print_generic_expr (vect_dump, vectype, TDF_SLIM);
172             }
173           STMT_VINFO_VECTYPE (stmt_info) = vectype;
174 
175           nunits = TYPE_VECTOR_SUBPARTS (vectype);
176           if (vect_print_dump_info (REPORT_DETAILS))
177             fprintf (vect_dump, "nunits = %d", nunits);
178 
179           if (vectorization_factor)
180             {
181               /* FORNOW: don't allow mixed units.
182                  This restriction will be relaxed in the future.  */
183               if (nunits != vectorization_factor)
184                 {
185                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
186                     fprintf (vect_dump, "not vectorized: mixed data-types");
187                   return false;
188                 }
189             }
190           else
191             vectorization_factor = nunits;
192 
193           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
194                         * vectorization_factor == UNITS_PER_SIMD_WORD);
195         }
196     }
197 
198   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
199 
200   if (vectorization_factor <= 1)
201     {
202       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203         fprintf (vect_dump, "not vectorized: unsupported data-type");
204       return false;
205     }
206   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
207 
208   return true;
209 }
210 
211 
212 /* Function vect_analyze_operations.
213 
214    Scan the loop stmts and make sure they are all vectorizable.  */
215 
216 static bool
vect_analyze_operations(loop_vec_info loop_vinfo)217 vect_analyze_operations (loop_vec_info loop_vinfo)
218 {
219   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
220   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
221   int nbbs = loop->num_nodes;
222   block_stmt_iterator si;
223   unsigned int vectorization_factor = 0;
224   int i;
225   bool ok;
226   tree phi;
227   stmt_vec_info stmt_info;
228   bool need_to_vectorize = false;
229 
230   if (vect_print_dump_info (REPORT_DETAILS))
231     fprintf (vect_dump, "=== vect_analyze_operations ===");
232 
233   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
234   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
235 
236   for (i = 0; i < nbbs; i++)
237     {
238       basic_block bb = bbs[i];
239 
240       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
241         {
242 	  stmt_info = vinfo_for_stmt (phi);
243 	  if (vect_print_dump_info (REPORT_DETAILS))
244 	    {
245 	      fprintf (vect_dump, "examining phi: ");
246 	      print_generic_expr (vect_dump, phi, TDF_SLIM);
247 	    }
248 
249 	  gcc_assert (stmt_info);
250 
251 	  if (STMT_VINFO_LIVE_P (stmt_info))
252 	    {
253 	      /* FORNOW: not yet supported.  */
254 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
255 		fprintf (vect_dump, "not vectorized: value used after loop.");
256 	    return false;
257 	  }
258 
259 	  if (STMT_VINFO_RELEVANT_P (stmt_info))
260 	    {
261 	      /* Most likely a reduction-like computation that is used
262 	         in the loop.  */
263 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
264 	        fprintf (vect_dump, "not vectorized: unsupported pattern.");
265  	     return false;
266 	    }
267 	}
268 
269       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
270 	{
271 	  tree stmt = bsi_stmt (si);
272 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
273 
274 	  if (vect_print_dump_info (REPORT_DETAILS))
275 	    {
276 	      fprintf (vect_dump, "==> examining statement: ");
277 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
278 	    }
279 
280 	  gcc_assert (stmt_info);
281 
282 	  /* skip stmts which do not need to be vectorized.
283 	     this is expected to include:
284 	     - the COND_EXPR which is the loop exit condition
285 	     - any LABEL_EXPRs in the loop
286 	     - computations that are used only for array indexing or loop
287 	     control  */
288 
289 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
290 	      && !STMT_VINFO_LIVE_P (stmt_info))
291 	    {
292 	      if (vect_print_dump_info (REPORT_DETAILS))
293 	        fprintf (vect_dump, "irrelevant.");
294 	      continue;
295 	    }
296 
297           if (STMT_VINFO_RELEVANT_P (stmt_info))
298             {
299               gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
300               gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
301 
302 	      ok = (vectorizable_operation (stmt, NULL, NULL)
303 		    || vectorizable_assignment (stmt, NULL, NULL)
304 		    || vectorizable_load (stmt, NULL, NULL)
305 		    || vectorizable_store (stmt, NULL, NULL)
306 		    || vectorizable_condition (stmt, NULL, NULL));
307 
308 	      if (!ok)
309 		{
310 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
311 		    {
312 		      fprintf (vect_dump,
313 			       "not vectorized: relevant stmt not supported: ");
314 		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
315 		    }
316 		  return false;
317 		}
318 	      need_to_vectorize = true;
319             }
320 
321 	  if (STMT_VINFO_LIVE_P (stmt_info))
322 	    {
323 	      ok = vectorizable_reduction (stmt, NULL, NULL);
324 
325 	      if (ok)
326                 need_to_vectorize = true;
327               else
328 	        ok = vectorizable_live_operation (stmt, NULL, NULL);
329 
330 	      if (!ok)
331 		{
332 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
333 		    {
334 		      fprintf (vect_dump,
335 			       "not vectorized: live stmt not supported: ");
336 		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
337 		    }
338 		  return false;
339 		}
340 	    }
341 	} /* stmts in bb */
342     } /* bbs */
343 
344   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
345 
346   /* All operations in the loop are either irrelevant (deal with loop
347      control, or dead), or only used outside the loop and can be moved
348      out of the loop (e.g. invariants, inductions).  The loop can be
349      optimized away by scalar optimizations.  We're better off not
350      touching this loop.  */
351   if (!need_to_vectorize)
352     {
353       if (vect_print_dump_info (REPORT_DETAILS))
354 	fprintf (vect_dump,
355 		 "All the computation can be taken out of the loop.");
356       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
357         fprintf (vect_dump,
358 		 "not vectorized: redundant loop. no profit to vectorize.");
359       return false;
360     }
361 
362   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
363       && vect_print_dump_info (REPORT_DETAILS))
364     fprintf (vect_dump,
365         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
366         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
367 
368   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
369       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
370     {
371       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
372 	fprintf (vect_dump, "not vectorized: iteration count too small.");
373       return false;
374     }
375 
376   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
377       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
378       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
379     {
380       if (vect_print_dump_info (REPORT_DETAILS))
381         fprintf (vect_dump, "epilog loop required.");
382       if (!vect_can_advance_ivs_p (loop_vinfo))
383         {
384           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
385             fprintf (vect_dump,
386                      "not vectorized: can't create epilog loop 1.");
387           return false;
388         }
389       if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
390         {
391           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
392             fprintf (vect_dump,
393                      "not vectorized: can't create epilog loop 2.");
394           return false;
395         }
396     }
397 
398   return true;
399 }
400 
401 
402 /* Function exist_non_indexing_operands_for_use_p
403 
404    USE is one of the uses attached to STMT. Check if USE is
405    used in STMT for anything other than indexing an array.  */
406 
407 static bool
exist_non_indexing_operands_for_use_p(tree use,tree stmt)408 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
409 {
410   tree operand;
411   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
412 
413   /* USE corresponds to some operand in STMT. If there is no data
414      reference in STMT, then any operand that corresponds to USE
415      is not indexing an array.  */
416   if (!STMT_VINFO_DATA_REF (stmt_info))
417     return true;
418 
419   /* STMT has a data_ref. FORNOW this means that its of one of
420      the following forms:
421      -1- ARRAY_REF = var
422      -2- var = ARRAY_REF
423      (This should have been verified in analyze_data_refs).
424 
425      'var' in the second case corresponds to a def, not a use,
426      so USE cannot correspond to any operands that are not used
427      for array indexing.
428 
429      Therefore, all we need to check is if STMT falls into the
430      first case, and whether var corresponds to USE.  */
431 
432   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
433     return false;
434 
435   operand = TREE_OPERAND (stmt, 1);
436 
437   if (TREE_CODE (operand) != SSA_NAME)
438     return false;
439 
440   if (operand == use)
441     return true;
442 
443   return false;
444 }
445 
446 
447 /* Function vect_analyze_scalar_cycles.
448 
449    Examine the cross iteration def-use cycles of scalar variables, by
450    analyzing the loop (scalar) PHIs; Classify each cycle as one of the
451    following: invariant, induction, reduction, unknown.
452 
453    Some forms of scalar cycles are not yet supported.
454 
455    Example1: reduction: (unsupported yet)
456 
457               loop1:
458               for (i=0; i<N; i++)
459                  sum += a[i];
460 
461    Example2: induction: (unsupported yet)
462 
463               loop2:
464               for (i=0; i<N; i++)
465                  a[i] = i;
466 
467    Note: the following loop *is* vectorizable:
468 
469               loop3:
470               for (i=0; i<N; i++)
471                  a[i] = b[i];
472 
473          even though it has a def-use cycle caused by the induction variable i:
474 
475               loop: i_2 = PHI (i_0, i_1)
476                     a[i_2] = ...;
477                     i_1 = i_2 + 1;
478                     GOTO loop;
479 
480          because the def-use cycle in loop3 is considered "not relevant" - i.e.,
481          it does not need to be vectorized because it is only used for array
482          indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
483          loop2 on the other hand is relevant (it is being written to memory).
484 */
485 
486 static void
vect_analyze_scalar_cycles(loop_vec_info loop_vinfo)487 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
488 {
489   tree phi;
490   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
491   basic_block bb = loop->header;
492   tree dummy;
493 
494   if (vect_print_dump_info (REPORT_DETAILS))
495     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
496 
497   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
498     {
499       tree access_fn = NULL;
500       tree def = PHI_RESULT (phi);
501       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
502       tree reduc_stmt;
503 
504       if (vect_print_dump_info (REPORT_DETAILS))
505 	{
506           fprintf (vect_dump, "Analyze phi: ");
507           print_generic_expr (vect_dump, phi, TDF_SLIM);
508 	}
509 
510       /* Skip virtual phi's. The data dependences that are associated with
511          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
512 
513       if (!is_gimple_reg (SSA_NAME_VAR (def)))
514 	{
515 	  if (vect_print_dump_info (REPORT_DETAILS))
516 	    fprintf (vect_dump, "virtual phi. skip.");
517 	  continue;
518 	}
519 
520       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
521 
522       /* Analyze the evolution function.  */
523 
524       access_fn = analyze_scalar_evolution (loop, def);
525 
526       if (!access_fn)
527 	continue;
528 
529       if (vect_print_dump_info (REPORT_DETAILS))
530         {
531            fprintf (vect_dump, "Access function of PHI: ");
532            print_generic_expr (vect_dump, access_fn, TDF_SLIM);
533         }
534 
535       if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
536 	{
537 	  if (vect_print_dump_info (REPORT_DETAILS))
538 	    fprintf (vect_dump, "Detected induction.");
539 	  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
540           continue;
541 	}
542 
543       /* TODO: handle invariant phis  */
544 
545       reduc_stmt = vect_is_simple_reduction (loop, phi);
546       if (reduc_stmt)
547         {
548           if (vect_print_dump_info (REPORT_DETAILS))
549             fprintf (vect_dump, "Detected reduction.");
550           STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
551           STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
552                                                         vect_reduction_def;
553         }
554       else
555         if (vect_print_dump_info (REPORT_DETAILS))
556           fprintf (vect_dump, "Unknown def-use cycle pattern.");
557 
558     }
559 
560   return;
561 }
562 
563 
564 /* Function vect_analyze_data_ref_dependence.
565 
566    Return TRUE if there (might) exist a dependence between a memory-reference
567    DRA and a memory-reference DRB.  */
568 
569 static bool
vect_analyze_data_ref_dependence(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo)570 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
571                                   loop_vec_info loop_vinfo)
572 {
573   unsigned int i;
574   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
575   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
576   unsigned int loop_depth = 0;
577   struct loop *loop_nest = loop;
578   struct data_reference *dra = DDR_A (ddr);
579   struct data_reference *drb = DDR_B (ddr);
580   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
581   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
582 
583   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
584     return false;
585 
586   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
587     {
588       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
589         {
590           fprintf (vect_dump,
591                    "not vectorized: can't determine dependence between ");
592           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
593           fprintf (vect_dump, " and ");
594           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
595         }
596       return true;
597     }
598 
599   if (DDR_NUM_DIST_VECTS (ddr) == 0)
600     {
601       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
602         {
603           fprintf (vect_dump, "not vectorized: bad dist vector for ");
604           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605           fprintf (vect_dump, " and ");
606           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
607         }
608       return true;
609     }
610 
611   /* Find loop depth.  */
612   while (loop_nest && loop_nest->outer && loop_nest->outer->outer)
613     {
614       loop_nest = loop_nest->outer;
615       loop_depth++;
616     }
617 
618   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
619     {
620       int dist = DDR_DIST_VECT (ddr, i)[loop_depth];
621 
622       if (vect_print_dump_info (REPORT_DR_DETAILS))
623 	fprintf (vect_dump, "dependence distance  = %d.", dist);
624 
625       /* Same loop iteration.  */
626       if (dist % vectorization_factor == 0)
627 	{
628 	  /* Two references with distance zero have the same alignment.  */
629 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
630 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
631 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
632 	    fprintf (vect_dump, "accesses have the same alignment.");
633 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
634 	    {
635 	      fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
636 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
637 	      fprintf (vect_dump, " and ");
638 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
639 	    }
640 	  continue;
641 	}
642 
643       if (abs (dist) >= vectorization_factor)
644 	{
645 	  /* Dependence distance does not create dependence, as far as vectorization
646 	     is concerned, in this case.  */
647 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
648 	    fprintf (vect_dump, "dependence distance >= VF.");
649 	  continue;
650 	}
651 
652       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
653 	{
654 	  fprintf (vect_dump,
655 		   "not vectorized: possible dependence between data-refs ");
656 	  print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
657 	  fprintf (vect_dump, " and ");
658 	  print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
659 	}
660 
661       return true;
662     }
663 
664   return false;
665 }
666 
667 
668 /* Function vect_analyze_data_ref_dependences.
669 
670    Examine all the data references in the loop, and make sure there do not
671    exist any data dependences between them.  */
672 
673 static bool
vect_analyze_data_ref_dependences(loop_vec_info loop_vinfo)674 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
675 {
676   unsigned int i;
677   varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo);
678 
679   if (vect_print_dump_info (REPORT_DETAILS))
680     fprintf (vect_dump, "=== vect_analyze_dependences ===");
681 
682   for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
683     {
684       struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
685 
686       if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
687         return false;
688     }
689 
690   return true;
691 }
692 
693 
694 /* Function vect_compute_data_ref_alignment
695 
696    Compute the misalignment of the data reference DR.
697 
698    Output:
699    1. If during the misalignment computation it is found that the data reference
700       cannot be vectorized then false is returned.
701    2. DR_MISALIGNMENT (DR) is defined.
702 
703    FOR NOW: No analysis is actually performed. Misalignment is calculated
704    only for trivial cases. TODO.  */
705 
706 static bool
vect_compute_data_ref_alignment(struct data_reference * dr)707 vect_compute_data_ref_alignment (struct data_reference *dr)
708 {
709   tree stmt = DR_STMT (dr);
710   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
711   tree ref = DR_REF (dr);
712   tree vectype;
713   tree base, base_addr;
714   bool base_aligned;
715   tree misalign;
716   tree aligned_to, alignment;
717 
718   if (vect_print_dump_info (REPORT_DETAILS))
719     fprintf (vect_dump, "vect_compute_data_ref_alignment:");
720 
721   /* Initialize misalignment to unknown.  */
722   DR_MISALIGNMENT (dr) = -1;
723 
724   misalign = DR_OFFSET_MISALIGNMENT (dr);
725   aligned_to = DR_ALIGNED_TO (dr);
726   base_addr = DR_BASE_ADDRESS (dr);
727   base = build_fold_indirect_ref (base_addr);
728   vectype = STMT_VINFO_VECTYPE (stmt_info);
729   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
730 
731   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
732       || !misalign)
733     {
734       if (vect_print_dump_info (REPORT_DETAILS))
735 	{
736 	  fprintf (vect_dump, "Unknown alignment for access: ");
737 	  print_generic_expr (vect_dump, base, TDF_SLIM);
738 	}
739       return true;
740     }
741 
742   if ((DECL_P (base)
743        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
744 				alignment) >= 0)
745       || (TREE_CODE (base_addr) == SSA_NAME
746 	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
747 						      TREE_TYPE (base_addr)))),
748 				   alignment) >= 0))
749     base_aligned = true;
750   else
751     base_aligned = false;
752 
753   if (!base_aligned)
754     {
755       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
756 	{
757 	  if (vect_print_dump_info (REPORT_DETAILS))
758 	    {
759 	      fprintf (vect_dump, "can't force alignment of ref: ");
760 	      print_generic_expr (vect_dump, ref, TDF_SLIM);
761 	    }
762 	  return true;
763 	}
764 
765       /* Force the alignment of the decl.
766 	 NOTE: This is the only change to the code we make during
767 	 the analysis phase, before deciding to vectorize the loop.  */
768       if (vect_print_dump_info (REPORT_DETAILS))
769 	fprintf (vect_dump, "force alignment");
770       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
771       DECL_USER_ALIGN (base) = 1;
772     }
773 
774   /* At this point we assume that the base is aligned.  */
775   gcc_assert (base_aligned
776 	      || (TREE_CODE (base) == VAR_DECL
777 		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
778 
779   /* Modulo alignment.  */
780   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
781 
782   if (!host_integerp (misalign, 1))
783     {
784       /* Negative or overflowed misalignment value.  */
785       if (vect_print_dump_info (REPORT_DETAILS))
786 	fprintf (vect_dump, "unexpected misalign value");
787       return false;
788     }
789 
790   DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
791 
792   if (vect_print_dump_info (REPORT_DETAILS))
793     {
794       fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
795       print_generic_expr (vect_dump, ref, TDF_SLIM);
796     }
797 
798   return true;
799 }
800 
801 
802 /* Function vect_compute_data_refs_alignment
803 
804    Compute the misalignment of data references in the loop.
805    Return FALSE if a data reference is found that cannot be vectorized.  */
806 
807 static bool
vect_compute_data_refs_alignment(loop_vec_info loop_vinfo)808 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
809 {
810   varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
811   unsigned int i;
812 
813   for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
814     {
815       struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
816       if (!vect_compute_data_ref_alignment (dr))
817 	return false;
818     }
819 
820   return true;
821 }
822 
823 
824 /* Function vect_update_misalignment_for_peel
825 
826    DR - the data reference whose misalignment is to be adjusted.
827    DR_PEEL - the data reference whose misalignment is being made
828              zero in the vector loop by the peel.
829    NPEEL - the number of iterations in the peel loop if the misalignment
830            of DR_PEEL is known at compile time.  */
831 
832 static void
vect_update_misalignment_for_peel(struct data_reference * dr,struct data_reference * dr_peel,int npeel)833 vect_update_misalignment_for_peel (struct data_reference *dr,
834                                    struct data_reference *dr_peel, int npeel)
835 {
836   unsigned int i;
837   int drsize;
838   VEC(dr_p,heap) *same_align_drs;
839   struct data_reference *current_dr;
840 
841   if (known_alignment_for_access_p (dr)
842       && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
843     {
844       DR_MISALIGNMENT (dr) = 0;
845       return;
846     }
847 
848   /* It can be assumed that the data refs with the same alignment as dr_peel
849      are aligned in the vector loop.  */
850   same_align_drs
851     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
852   for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
853     {
854       if (current_dr != dr)
855         continue;
856       gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
857       DR_MISALIGNMENT (dr) = 0;
858       return;
859     }
860 
861   if (known_alignment_for_access_p (dr)
862       && known_alignment_for_access_p (dr_peel))
863     {
864       drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
865       DR_MISALIGNMENT (dr) += npeel * drsize;
866       DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
867       return;
868     }
869 
870   DR_MISALIGNMENT (dr) = -1;
871 }
872 
873 
874 /* Function vect_verify_datarefs_alignment
875 
876    Return TRUE if all data references in the loop can be
877    handled with respect to alignment.  */
878 
879 static bool
vect_verify_datarefs_alignment(loop_vec_info loop_vinfo)880 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
881 {
882   varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
883   enum dr_alignment_support supportable_dr_alignment;
884   unsigned int i;
885 
886   for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
887     {
888       struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
889       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
890       if (!supportable_dr_alignment)
891         {
892           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
893             {
894               if (DR_IS_READ (dr))
895                 fprintf (vect_dump,
896                          "not vectorized: unsupported unaligned load.");
897               else
898                 fprintf (vect_dump,
899                          "not vectorized: unsupported unaligned store.");
900             }
901           return false;
902         }
903       if (supportable_dr_alignment != dr_aligned
904           && vect_print_dump_info (REPORT_ALIGNMENT))
905         fprintf (vect_dump, "Vectorizing an unaligned access.");
906     }
907   return true;
908 }
909 
910 
911 /* Function vect_enhance_data_refs_alignment
912 
913    This pass will use loop versioning and loop peeling in order to enhance
914    the alignment of data references in the loop.
915 
916    FOR NOW: we assume that whatever versioning/peeling takes place, only the
917    original loop is to be vectorized; Any other loops that are created by
918    the transformations performed in this pass - are not supposed to be
919    vectorized. This restriction will be relaxed.
920 
921    This pass will require a cost model to guide it whether to apply peeling
922    or versioning or a combination of the two. For example, the scheme that
923    intel uses when given a loop with several memory accesses, is as follows:
924    choose one memory access ('p') which alignment you want to force by doing
925    peeling. Then, either (1) generate a loop in which 'p' is aligned and all
926    other accesses are not necessarily aligned, or (2) use loop versioning to
927    generate one loop in which all accesses are aligned, and another loop in
928    which only 'p' is necessarily aligned.
929 
930    ("Automatic Intra-Register Vectorization for the Intel Architecture",
931    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
932    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
933 
934    Devising a cost model is the most critical aspect of this work. It will
935    guide us on which access to peel for, whether to use loop versioning, how
936    many versions to create, etc. The cost model will probably consist of
937    generic considerations as well as target specific considerations (on
938    powerpc for example, misaligned stores are more painful than misaligned
939    loads).
940 
941    Here are the general steps involved in alignment enhancements:
942 
943      -- original loop, before alignment analysis:
944 	for (i=0; i<N; i++){
945 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
946 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
947 	}
948 
949      -- After vect_compute_data_refs_alignment:
950 	for (i=0; i<N; i++){
951 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
952 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
953 	}
954 
955      -- Possibility 1: we do loop versioning:
956      if (p is aligned) {
957 	for (i=0; i<N; i++){	# loop 1A
958 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
959 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
960 	}
961      }
962      else {
963 	for (i=0; i<N; i++){	# loop 1B
964 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
965 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
966 	}
967      }
968 
969      -- Possibility 2: we do loop peeling:
970      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
971 	x = q[i];
972 	p[i] = y;
973      }
974      for (i = 3; i < N; i++){	# loop 2A
975 	x = q[i];			# DR_MISALIGNMENT(q) = 0
976 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
977      }
978 
979      -- Possibility 3: combination of loop peeling and versioning:
980      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
981 	x = q[i];
982 	p[i] = y;
983      }
984      if (p is aligned) {
985 	for (i = 3; i<N; i++){	# loop 3A
986 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
987 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
988 	}
989      }
990      else {
991 	for (i = 3; i<N; i++){	# loop 3B
992 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
993 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
994 	}
995      }
996 
997      These loops are later passed to loop_transform to be vectorized. The
998      vectorizer will use the alignment information to guide the transformation
999      (whether to generate regular loads/stores, or with special handling for
1000      misalignment).  */
1001 
1002 static bool
vect_enhance_data_refs_alignment(loop_vec_info loop_vinfo)1003 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1004 {
1005   varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1006   enum dr_alignment_support supportable_dr_alignment;
1007   struct data_reference *dr0 = NULL;
1008   struct data_reference *dr;
1009   unsigned int i;
1010   bool do_peeling = false;
1011   bool do_versioning = false;
1012   bool stat;
1013 
1014   /* While cost model enhancements are expected in the future, the high level
1015      view of the code at this time is as follows:
1016 
1017      A) If there is a misaligned write then see if peeling to align this write
1018         can make all data references satisfy vect_supportable_dr_alignment.
1019         If so, update data structures as needed and return true.  Note that
1020         at this time vect_supportable_dr_alignment is known to return false
1021         for a a misaligned write.
1022 
1023      B) If peeling wasn't possible and there is a data reference with an
1024         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1025         then see if loop versioning checks can be used to make all data
1026         references satisfy vect_supportable_dr_alignment.  If so, update
1027         data structures as needed and return true.
1028 
1029      C) If neither peeling nor versioning were successful then return false if
1030         any data reference does not satisfy vect_supportable_dr_alignment.
1031 
1032      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1033 
1034      Note, Possibility 3 above (which is peeling and versioning together) is not
1035      being done at this time.  */
1036 
1037   /* (1) Peeling to force alignment.  */
1038 
1039   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1040      Considerations:
1041      + How many accesses will become aligned due to the peeling
1042      - How many accesses will become unaligned due to the peeling,
1043        and the cost of misaligned accesses.
1044      - The cost of peeling (the extra runtime checks, the increase
1045        in code size).
1046 
1047      The scheme we use FORNOW: peel to force the alignment of the first
1048      misaligned store in the loop.
1049      Rationale: misaligned stores are not yet supported.
1050 
1051      TODO: Use a cost model.  */
1052 
1053   for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1054     {
1055       dr = VARRAY_GENERIC_PTR (datarefs, i);
1056       if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1057         {
1058           dr0 = dr;
1059           do_peeling = true;
1060           break;
1061         }
1062     }
1063 
1064   /* Often peeling for alignment will require peeling for loop-bound, which in
1065      turn requires that we know how to adjust the loop ivs after the loop.  */
1066   if (!vect_can_advance_ivs_p (loop_vinfo))
1067     do_peeling = false;
1068 
1069   if (do_peeling)
1070     {
1071       int mis;
1072       int npeel = 0;
1073 
1074       if (known_alignment_for_access_p (dr0))
1075         {
1076           /* Since it's known at compile time, compute the number of iterations
1077              in the peeled loop (the peeling factor) for use in updating
1078              DR_MISALIGNMENT values.  The peeling factor is the vectorization
1079              factor minus the misalignment as an element count.  */
1080           mis = DR_MISALIGNMENT (dr0);
1081           mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1082           npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1083         }
1084 
1085       /* Ensure that all data refs can be vectorized after the peel.  */
1086       for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1087         {
1088           int save_misalignment;
1089 
1090 	  dr = VARRAY_GENERIC_PTR (datarefs, i);
1091 	  if (dr == dr0)
1092 	    continue;
1093 	  save_misalignment = DR_MISALIGNMENT (dr);
1094 	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1095 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1096 	  DR_MISALIGNMENT (dr) = save_misalignment;
1097 
1098 	  if (!supportable_dr_alignment)
1099 	    {
1100 	      do_peeling = false;
1101 	      break;
1102 	    }
1103 	}
1104 
1105       if (do_peeling)
1106         {
1107           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1108              If the misalignment of DR_i is identical to that of dr0 then set
1109              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1110              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1111              by the peeling factor times the element size of DR_i (MOD the
1112              vectorization factor times the size).  Otherwise, the
1113              misalignment of DR_i must be set to unknown.  */
1114 	  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1115 	    {
1116 	      dr = VARRAY_GENERIC_PTR (datarefs, i);
1117 	      if (dr == dr0)
1118 	        continue;
1119 	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1120 	    }
1121 
1122           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1123           LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1124           DR_MISALIGNMENT (dr0) = 0;
1125 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1126             fprintf (vect_dump, "Alignment of access forced using peeling.");
1127 
1128           if (vect_print_dump_info (REPORT_DETAILS))
1129             fprintf (vect_dump, "Peeling for alignment will be applied.");
1130 
1131 	  stat = vect_verify_datarefs_alignment (loop_vinfo);
1132 	  gcc_assert (stat);
1133           return stat;
1134         }
1135     }
1136 
1137 
1138   /* (2) Versioning to force alignment.  */
1139 
1140   /* Try versioning if:
1141      1) flag_tree_vect_loop_version is TRUE
1142      2) optimize_size is FALSE
1143      3) there is at least one unsupported misaligned data ref with an unknown
1144         misalignment, and
1145      4) all misaligned data refs with a known misalignment are supported, and
1146      5) the number of runtime alignment checks is within reason.  */
1147 
1148   do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1149 
1150   if (do_versioning)
1151     {
1152       for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1153         {
1154           dr = VARRAY_GENERIC_PTR (datarefs, i);
1155 
1156           if (aligned_access_p (dr))
1157             continue;
1158 
1159           supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1160 
1161           if (!supportable_dr_alignment)
1162             {
1163               tree stmt;
1164               int mask;
1165               tree vectype;
1166 
1167               if (known_alignment_for_access_p (dr)
1168                   || VEC_length (tree,
1169                                  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1170                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1171                 {
1172                   do_versioning = false;
1173                   break;
1174                 }
1175 
1176               stmt = DR_STMT (dr);
1177               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1178               gcc_assert (vectype);
1179 
1180               /* The rightmost bits of an aligned address must be zeros.
1181                  Construct the mask needed for this test.  For example,
1182                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1183                  mask must be 15 = 0xf. */
1184               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1185 
1186               /* FORNOW: use the same mask to test all potentially unaligned
1187                  references in the loop.  The vectorizer currently supports
1188                  a single vector size, see the reference to
1189                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1190                  vectorization factor is computed.  */
1191               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1192                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1193               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1194               VEC_safe_push (tree, heap,
1195                              LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1196                              DR_STMT (dr));
1197             }
1198         }
1199 
1200       /* Versioning requires at least one misaligned data reference.  */
1201       if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1202         do_versioning = false;
1203       else if (!do_versioning)
1204         VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1205     }
1206 
1207   if (do_versioning)
1208     {
1209       VEC(tree,heap) *may_misalign_stmts
1210         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1211       tree stmt;
1212 
1213       /* It can now be assumed that the data references in the statements
1214          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1215          of the loop being vectorized.  */
1216       for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1217         {
1218           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1219           dr = STMT_VINFO_DATA_REF (stmt_info);
1220           DR_MISALIGNMENT (dr) = 0;
1221 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1222             fprintf (vect_dump, "Alignment of access forced using versioning.");
1223         }
1224 
1225       if (vect_print_dump_info (REPORT_DETAILS))
1226         fprintf (vect_dump, "Versioning for alignment will be applied.");
1227 
1228       /* Peeling and versioning can't be done together at this time.  */
1229       gcc_assert (! (do_peeling && do_versioning));
1230 
1231       stat = vect_verify_datarefs_alignment (loop_vinfo);
1232       gcc_assert (stat);
1233       return stat;
1234     }
1235 
1236   /* This point is reached if neither peeling nor versioning is being done.  */
1237   gcc_assert (! (do_peeling || do_versioning));
1238 
1239   stat = vect_verify_datarefs_alignment (loop_vinfo);
1240   return stat;
1241 }
1242 
1243 
1244 /* Function vect_analyze_data_refs_alignment
1245 
1246    Analyze the alignment of the data-references in the loop.
1247    Return FALSE if a data reference is found that cannot be vectorized.  */
1248 
1249 static bool
vect_analyze_data_refs_alignment(loop_vec_info loop_vinfo)1250 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1251 {
1252   if (vect_print_dump_info (REPORT_DETAILS))
1253     fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1254 
1255   if (!vect_compute_data_refs_alignment (loop_vinfo))
1256     {
1257       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1258 	fprintf (vect_dump,
1259 		 "not vectorized: can't calculate alignment for data ref.");
1260       return false;
1261     }
1262 
1263   return true;
1264 }
1265 
1266 
1267 /* Function vect_analyze_data_ref_access.
1268 
1269    Analyze the access pattern of the data-reference DR. For now, a data access
1270    has to be consecutive to be considered vectorizable.  */
1271 
1272 static bool
vect_analyze_data_ref_access(struct data_reference * dr)1273 vect_analyze_data_ref_access (struct data_reference *dr)
1274 {
1275   tree step = DR_STEP (dr);
1276   tree scalar_type = TREE_TYPE (DR_REF (dr));
1277 
1278   if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1279     {
1280       if (vect_print_dump_info (REPORT_DETAILS))
1281 	fprintf (vect_dump, "not consecutive access");
1282       return false;
1283     }
1284   return true;
1285 }
1286 
1287 
1288 /* Function vect_analyze_data_ref_accesses.
1289 
1290    Analyze the access pattern of all the data references in the loop.
1291 
1292    FORNOW: the only access pattern that is considered vectorizable is a
1293 	   simple step 1 (consecutive) access.
1294 
1295    FORNOW: handle only arrays and pointer accesses.  */
1296 
1297 static bool
vect_analyze_data_ref_accesses(loop_vec_info loop_vinfo)1298 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1299 {
1300   unsigned int i;
1301   varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1302 
1303   if (vect_print_dump_info (REPORT_DETAILS))
1304     fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1305 
1306   for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1307     {
1308       struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1309       if (!vect_analyze_data_ref_access (dr))
1310 	{
1311 	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1312 	    fprintf (vect_dump, "not vectorized: complicated access pattern.");
1313 	  return false;
1314 	}
1315     }
1316 
1317   return true;
1318 }
1319 
1320 
1321 /* Function vect_analyze_data_refs.
1322 
1323   Find all the data references in the loop.
1324 
1325    The general structure of the analysis of data refs in the vectorizer is as
1326    follows:
1327    1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1328       find and analyze all data-refs in the loop and their dependences.
1329    2- vect_analyze_dependences(): apply dependence testing using ddrs.
1330    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1331    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1332 
1333 */
1334 
1335 static bool
vect_analyze_data_refs(loop_vec_info loop_vinfo)1336 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1337 {
1338   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1339   unsigned int i;
1340   varray_type datarefs;
1341   tree scalar_type;
1342 
1343   if (vect_print_dump_info (REPORT_DETAILS))
1344     fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1345 
1346   compute_data_dependences_for_loop (loop, false,
1347                                      &(LOOP_VINFO_DATAREFS (loop_vinfo)),
1348                                      &(LOOP_VINFO_DDRS (loop_vinfo)));
1349 
1350   /* Go through the data-refs, check that the analysis succeeded. Update pointer
1351      from stmt_vec_info struct to DR and vectype.  */
1352   datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1353   for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1354     {
1355       struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1356       tree stmt;
1357       stmt_vec_info stmt_info;
1358 
1359       if (!dr || !DR_REF (dr))
1360         {
1361           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1362 	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1363           return false;
1364         }
1365 
1366       /* Update DR field in stmt_vec_info struct.  */
1367       stmt = DR_STMT (dr);
1368       stmt_info = vinfo_for_stmt (stmt);
1369 
1370       if (STMT_VINFO_DATA_REF (stmt_info))
1371         {
1372           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1373             {
1374               fprintf (vect_dump,
1375                        "not vectorized: more than one data ref in stmt: ");
1376               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1377             }
1378           return false;
1379         }
1380       STMT_VINFO_DATA_REF (stmt_info) = dr;
1381 
1382       /* Check that analysis of the data-ref succeeded.  */
1383       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1384           || !DR_STEP (dr))
1385         {
1386           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1387             {
1388               fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1389               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1390             }
1391           return false;
1392         }
1393       if (!DR_MEMTAG (dr))
1394         {
1395           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1396             {
1397               fprintf (vect_dump, "not vectorized: no memory tag for ");
1398               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1399             }
1400           return false;
1401         }
1402 
1403       /* Set vectype for STMT.  */
1404       scalar_type = TREE_TYPE (DR_REF (dr));
1405       STMT_VINFO_VECTYPE (stmt_info) =
1406                 get_vectype_for_scalar_type (scalar_type);
1407       if (!STMT_VINFO_VECTYPE (stmt_info))
1408         {
1409           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1410             {
1411               fprintf (vect_dump,
1412                        "not vectorized: no vectype for stmt: ");
1413               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1414               fprintf (vect_dump, " scalar_type: ");
1415               print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1416             }
1417           return false;
1418         }
1419     }
1420 
1421   return true;
1422 }
1423 
1424 
1425 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
1426 
1427 /* Function vect_mark_relevant.
1428 
1429    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
1430 
1431 static void
vect_mark_relevant(VEC (tree,heap)** worklist,tree stmt,bool relevant_p,bool live_p)1432 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1433 		    bool relevant_p, bool live_p)
1434 {
1435   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1436   bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1437   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1438 
1439   if (vect_print_dump_info (REPORT_DETAILS))
1440     fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1441 
1442   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1443   STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1444 
1445   if (TREE_CODE (stmt) == PHI_NODE)
1446     /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1447        or live will fail vectorization later on.  */
1448     return;
1449 
1450   if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1451       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1452     {
1453       if (vect_print_dump_info (REPORT_DETAILS))
1454         fprintf (vect_dump, "already marked relevant/live.");
1455       return;
1456     }
1457 
1458   VEC_safe_push (tree, heap, *worklist, stmt);
1459 }
1460 
1461 
1462 /* Function vect_stmt_relevant_p.
1463 
1464    Return true if STMT in loop that is represented by LOOP_VINFO is
1465    "relevant for vectorization".
1466 
1467    A stmt is considered "relevant for vectorization" if:
1468    - it has uses outside the loop.
1469    - it has vdefs (it alters memory).
1470    - control stmts in the loop (except for the exit condition).
1471 
1472    CHECKME: what other side effects would the vectorizer allow?  */
1473 
1474 static bool
vect_stmt_relevant_p(tree stmt,loop_vec_info loop_vinfo,bool * relevant_p,bool * live_p)1475 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1476 		      bool *relevant_p, bool *live_p)
1477 {
1478   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1479   ssa_op_iter op_iter;
1480   imm_use_iterator imm_iter;
1481   use_operand_p use_p;
1482   def_operand_p def_p;
1483 
1484   *relevant_p = false;
1485   *live_p = false;
1486 
1487   /* cond stmt other than loop exit cond.  */
1488   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1489     *relevant_p = true;
1490 
1491   /* changing memory.  */
1492   if (TREE_CODE (stmt) != PHI_NODE)
1493     if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1494       {
1495 	if (vect_print_dump_info (REPORT_DETAILS))
1496 	  fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1497 	*relevant_p = true;
1498       }
1499 
1500   /* uses outside the loop.  */
1501   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1502     {
1503       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1504 	{
1505 	  basic_block bb = bb_for_stmt (USE_STMT (use_p));
1506 	  if (!flow_bb_inside_loop_p (loop, bb))
1507 	    {
1508 	      if (vect_print_dump_info (REPORT_DETAILS))
1509 		fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1510 
1511 	      /* We expect all such uses to be in the loop exit phis
1512 		 (because of loop closed form)   */
1513 	      gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1514 	      gcc_assert (bb == loop->single_exit->dest);
1515 
1516               *live_p = true;
1517 	    }
1518 	}
1519     }
1520 
1521   return (*live_p || *relevant_p);
1522 }
1523 
1524 
1525 /* Function vect_mark_stmts_to_be_vectorized.
1526 
1527    Not all stmts in the loop need to be vectorized. For example:
1528 
1529      for i...
1530        for j...
1531    1.    T0 = i + j
1532    2.	 T1 = a[T0]
1533 
1534    3.    j = j + 1
1535 
1536    Stmt 1 and 3 do not need to be vectorized, because loop control and
1537    addressing of vectorized data-refs are handled differently.
1538 
1539    This pass detects such stmts.  */
1540 
1541 static bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)1542 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1543 {
1544   VEC(tree,heap) *worklist;
1545   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1546   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1547   unsigned int nbbs = loop->num_nodes;
1548   block_stmt_iterator si;
1549   tree stmt, use;
1550   stmt_ann_t ann;
1551   ssa_op_iter iter;
1552   unsigned int i;
1553   stmt_vec_info stmt_vinfo;
1554   basic_block bb;
1555   tree phi;
1556   bool relevant_p, live_p;
1557   tree def, def_stmt;
1558   enum vect_def_type dt;
1559 
1560   if (vect_print_dump_info (REPORT_DETAILS))
1561     fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1562 
1563   worklist = VEC_alloc (tree, heap, 64);
1564 
1565   /* 1. Init worklist.  */
1566 
1567   bb = loop->header;
1568   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1569     {
1570       if (vect_print_dump_info (REPORT_DETAILS))
1571         {
1572           fprintf (vect_dump, "init: phi relevant? ");
1573           print_generic_expr (vect_dump, phi, TDF_SLIM);
1574         }
1575 
1576       if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1577 	vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1578     }
1579 
1580   for (i = 0; i < nbbs; i++)
1581     {
1582       bb = bbs[i];
1583       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1584 	{
1585 	  stmt = bsi_stmt (si);
1586 
1587 	  if (vect_print_dump_info (REPORT_DETAILS))
1588 	    {
1589 	      fprintf (vect_dump, "init: stmt relevant? ");
1590 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
1591 	    }
1592 
1593 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1594             vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1595 	}
1596     }
1597 
1598 
1599   /* 2. Process_worklist */
1600 
1601   while (VEC_length (tree, worklist) > 0)
1602     {
1603       stmt = VEC_pop (tree, worklist);
1604 
1605       if (vect_print_dump_info (REPORT_DETAILS))
1606 	{
1607           fprintf (vect_dump, "worklist: examine stmt: ");
1608           print_generic_expr (vect_dump, stmt, TDF_SLIM);
1609 	}
1610 
1611       /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1612          in the loop, mark the stmt that defines it (DEF_STMT) as
1613          relevant/irrelevant and live/dead according to the liveness and
1614          relevance properties of STMT.
1615        */
1616 
1617       gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1618 
1619       ann = stmt_ann (stmt);
1620       stmt_vinfo = vinfo_for_stmt (stmt);
1621 
1622       relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1623       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1624 
1625       /* Generally, the liveness and relevance properties of STMT are
1626          propagated to the DEF_STMTs of its USEs:
1627              STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1628              STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1629 
1630          Exceptions:
1631 
1632 	 (case 1)
1633            If USE is used only for address computations (e.g. array indexing),
1634            which does not need to be directly vectorized, then the
1635            liveness/relevance of the respective DEF_STMT is left unchanged.
1636 
1637 	 (case 2)
1638            If STMT has been identified as defining a reduction variable, then
1639 	   we have two cases:
1640 	   (case 2.1)
1641 	     The last use of STMT is the reduction-variable, which is defined
1642 	     by a loop-header-phi. We don't want to mark the phi as live or
1643 	     relevant (because it does not need to be vectorized, it is handled
1644              as part of the vectorization of the reduction), so in this case we
1645 	     skip the call to vect_mark_relevant.
1646 	   (case 2.2)
1647 	     The rest of the uses of STMT are defined in the loop body. For
1648              the def_stmt of these uses we want to set liveness/relevance
1649              as follows:
1650                STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1651                STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1652              because even though STMT is classified as live (since it defines a
1653              value that is used across loop iterations) and irrelevant (since it
1654              is not used inside the loop), it will be vectorized, and therefore
1655              the corresponding DEF_STMTs need to marked as relevant.
1656        */
1657 
1658       /* case 2.2:  */
1659       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1660         {
1661           gcc_assert (!relevant_p && live_p);
1662           relevant_p = true;
1663           live_p = false;
1664         }
1665 
1666       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1667 	{
1668 	  /* case 1: we are only interested in uses that need to be vectorized.
1669 	     Uses that are used for address computation are not considered
1670 	     relevant.
1671 	   */
1672 	  if (!exist_non_indexing_operands_for_use_p (use, stmt))
1673 	    continue;
1674 
1675 	  if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1676             {
1677               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1678                 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1679 	      VEC_free (tree, heap, worklist);
1680               return false;
1681             }
1682 
1683 	  if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1684 	    continue;
1685 
1686           if (vect_print_dump_info (REPORT_DETAILS))
1687             {
1688               fprintf (vect_dump, "worklist: examine use %d: ", i);
1689               print_generic_expr (vect_dump, use, TDF_SLIM);
1690             }
1691 
1692 	  bb = bb_for_stmt (def_stmt);
1693           if (!flow_bb_inside_loop_p (loop, bb))
1694             continue;
1695 
1696 	  /* case 2.1: the reduction-use does not mark the defining-phi
1697 	     as relevant.  */
1698 	  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1699 	      && TREE_CODE (def_stmt) == PHI_NODE)
1700 	    continue;
1701 
1702 	  vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1703 	}
1704     }				/* while worklist */
1705 
1706   VEC_free (tree, heap, worklist);
1707   return true;
1708 }
1709 
1710 
1711 /* Function vect_can_advance_ivs_p
1712 
1713    In case the number of iterations that LOOP iterates is unknown at compile
1714    time, an epilog loop will be generated, and the loop induction variables
1715    (IVs) will be "advanced" to the value they are supposed to take just before
1716    the epilog loop.  Here we check that the access function of the loop IVs
1717    and the expression that represents the loop bound are simple enough.
1718    These restrictions will be relaxed in the future.  */
1719 
1720 static bool
vect_can_advance_ivs_p(loop_vec_info loop_vinfo)1721 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1722 {
1723   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1724   basic_block bb = loop->header;
1725   tree phi;
1726 
1727   /* Analyze phi functions of the loop header.  */
1728 
1729   if (vect_print_dump_info (REPORT_DETAILS))
1730     fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1731 
1732   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1733     {
1734       tree access_fn = NULL;
1735       tree evolution_part;
1736 
1737       if (vect_print_dump_info (REPORT_DETAILS))
1738 	{
1739           fprintf (vect_dump, "Analyze phi: ");
1740           print_generic_expr (vect_dump, phi, TDF_SLIM);
1741 	}
1742 
1743       /* Skip virtual phi's. The data dependences that are associated with
1744          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1745 
1746       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1747 	{
1748 	  if (vect_print_dump_info (REPORT_DETAILS))
1749 	    fprintf (vect_dump, "virtual phi. skip.");
1750 	  continue;
1751 	}
1752 
1753       /* Skip reduction phis.  */
1754 
1755       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1756         {
1757           if (vect_print_dump_info (REPORT_DETAILS))
1758             fprintf (vect_dump, "reduc phi. skip.");
1759           continue;
1760         }
1761 
1762       /* Analyze the evolution function.  */
1763 
1764       access_fn = instantiate_parameters
1765 	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1766 
1767       if (!access_fn)
1768 	{
1769 	  if (vect_print_dump_info (REPORT_DETAILS))
1770 	    fprintf (vect_dump, "No Access function.");
1771 	  return false;
1772 	}
1773 
1774       if (vect_print_dump_info (REPORT_DETAILS))
1775         {
1776 	  fprintf (vect_dump, "Access function of PHI: ");
1777 	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1778         }
1779 
1780       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1781 
1782       if (evolution_part == NULL_TREE)
1783         {
1784 	  if (vect_print_dump_info (REPORT_DETAILS))
1785 	    fprintf (vect_dump, "No evolution.");
1786 	  return false;
1787         }
1788 
1789       /* FORNOW: We do not transform initial conditions of IVs
1790 	 which evolution functions are a polynomial of degree >= 2.  */
1791 
1792       if (tree_is_chrec (evolution_part))
1793 	return false;
1794     }
1795 
1796   return true;
1797 }
1798 
1799 
1800 /* Function vect_get_loop_niters.
1801 
1802    Determine how many iterations the loop is executed.
1803    If an expression that represents the number of iterations
1804    can be constructed, place it in NUMBER_OF_ITERATIONS.
1805    Return the loop exit condition.  */
1806 
1807 static tree
vect_get_loop_niters(struct loop * loop,tree * number_of_iterations)1808 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1809 {
1810   tree niters;
1811 
1812   if (vect_print_dump_info (REPORT_DETAILS))
1813     fprintf (vect_dump, "=== get_loop_niters ===");
1814 
1815   niters = number_of_iterations_in_loop (loop);
1816 
1817   if (niters != NULL_TREE
1818       && niters != chrec_dont_know)
1819     {
1820       *number_of_iterations = niters;
1821 
1822       if (vect_print_dump_info (REPORT_DETAILS))
1823 	{
1824 	  fprintf (vect_dump, "==> get_loop_niters:" );
1825 	  print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1826 	}
1827     }
1828 
1829   return get_loop_exit_condition (loop);
1830 }
1831 
1832 
1833 /* Function vect_analyze_loop_form.
1834 
1835    Verify the following restrictions (some may be relaxed in the future):
1836    - it's an inner-most loop
1837    - number of BBs = 2 (which are the loop header and the latch)
1838    - the loop has a pre-header
1839    - the loop has a single entry and exit
1840    - the loop exit condition is simple enough, and the number of iterations
1841      can be analyzed (a countable loop).  */
1842 
1843 static loop_vec_info
vect_analyze_loop_form(struct loop * loop)1844 vect_analyze_loop_form (struct loop *loop)
1845 {
1846   loop_vec_info loop_vinfo;
1847   tree loop_cond;
1848   tree number_of_iterations = NULL;
1849 
1850   if (vect_print_dump_info (REPORT_DETAILS))
1851     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1852 
1853   if (loop->inner)
1854     {
1855       if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1856         fprintf (vect_dump, "not vectorized: nested loop.");
1857       return NULL;
1858     }
1859 
1860   if (!loop->single_exit
1861       || loop->num_nodes != 2
1862       || EDGE_COUNT (loop->header->preds) != 2)
1863     {
1864       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1865         {
1866           if (!loop->single_exit)
1867             fprintf (vect_dump, "not vectorized: multiple exits.");
1868           else if (loop->num_nodes != 2)
1869             fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1870           else if (EDGE_COUNT (loop->header->preds) != 2)
1871             fprintf (vect_dump, "not vectorized: too many incoming edges.");
1872         }
1873 
1874       return NULL;
1875     }
1876 
1877   /* We assume that the loop exit condition is at the end of the loop. i.e,
1878      that the loop is represented as a do-while (with a proper if-guard
1879      before the loop if needed), where the loop header contains all the
1880      executable statements, and the latch is empty.  */
1881   if (!empty_block_p (loop->latch))
1882     {
1883       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1884         fprintf (vect_dump, "not vectorized: unexpected loop form.");
1885       return NULL;
1886     }
1887 
1888   /* Make sure there exists a single-predecessor exit bb:  */
1889   if (!single_pred_p (loop->single_exit->dest))
1890     {
1891       edge e = loop->single_exit;
1892       if (!(e->flags & EDGE_ABNORMAL))
1893 	{
1894 	  split_loop_exit_edge (e);
1895 	  if (vect_print_dump_info (REPORT_DETAILS))
1896 	    fprintf (vect_dump, "split exit edge.");
1897 	}
1898       else
1899 	{
1900 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1901 	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1902 	  return NULL;
1903 	}
1904     }
1905 
1906   if (empty_block_p (loop->header))
1907     {
1908       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1909         fprintf (vect_dump, "not vectorized: empty loop.");
1910       return NULL;
1911     }
1912 
1913   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1914   if (!loop_cond)
1915     {
1916       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1917 	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1918       return NULL;
1919     }
1920 
1921   if (!number_of_iterations)
1922     {
1923       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1924 	fprintf (vect_dump,
1925 		 "not vectorized: number of iterations cannot be computed.");
1926       return NULL;
1927     }
1928 
1929   if (chrec_contains_undetermined (number_of_iterations))
1930     {
1931       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1932         fprintf (vect_dump, "Infinite number of iterations.");
1933       return false;
1934     }
1935 
1936   loop_vinfo = new_loop_vec_info (loop);
1937   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1938 
1939   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1940     {
1941       if (vect_print_dump_info (REPORT_DETAILS))
1942         {
1943           fprintf (vect_dump, "Symbolic number of iterations is ");
1944           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1945         }
1946     }
1947   else
1948   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1949     {
1950       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1951         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1952       return NULL;
1953     }
1954 
1955   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
1956 
1957   return loop_vinfo;
1958 }
1959 
1960 
1961 /* Function vect_analyze_loop.
1962 
1963    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1964    for it. The different analyses will record information in the
1965    loop_vec_info struct.  */
1966 loop_vec_info
vect_analyze_loop(struct loop * loop)1967 vect_analyze_loop (struct loop *loop)
1968 {
1969   bool ok;
1970   loop_vec_info loop_vinfo;
1971 
1972   if (vect_print_dump_info (REPORT_DETAILS))
1973     fprintf (vect_dump, "===== analyze_loop_nest =====");
1974 
1975   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1976 
1977   loop_vinfo = vect_analyze_loop_form (loop);
1978   if (!loop_vinfo)
1979     {
1980       if (vect_print_dump_info (REPORT_DETAILS))
1981 	fprintf (vect_dump, "bad loop form.");
1982       return NULL;
1983     }
1984 
1985   /* Find all data references in the loop (which correspond to vdefs/vuses)
1986      and analyze their evolution in the loop.
1987 
1988      FORNOW: Handle only simple, array references, which
1989      alignment can be forced, and aligned pointer-references.  */
1990 
1991   ok = vect_analyze_data_refs (loop_vinfo);
1992   if (!ok)
1993     {
1994       if (vect_print_dump_info (REPORT_DETAILS))
1995 	fprintf (vect_dump, "bad data references.");
1996       destroy_loop_vec_info (loop_vinfo);
1997       return NULL;
1998     }
1999 
2000   /* Classify all cross-iteration scalar data-flow cycles.
2001      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
2002 
2003   vect_analyze_scalar_cycles (loop_vinfo);
2004 
2005   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
2006 
2007   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2008   if (!ok)
2009     {
2010       if (vect_print_dump_info (REPORT_DETAILS))
2011 	fprintf (vect_dump, "unexpected pattern.");
2012       destroy_loop_vec_info (loop_vinfo);
2013       return NULL;
2014     }
2015 
2016   /* Analyze the alignment of the data-refs in the loop.
2017      Fail if a data reference is found that cannot be vectorized.  */
2018 
2019   ok = vect_analyze_data_refs_alignment (loop_vinfo);
2020   if (!ok)
2021     {
2022       if (vect_print_dump_info (REPORT_DETAILS))
2023 	fprintf (vect_dump, "bad data alignment.");
2024       destroy_loop_vec_info (loop_vinfo);
2025       return NULL;
2026     }
2027 
2028   ok = vect_determine_vectorization_factor (loop_vinfo);
2029   if (!ok)
2030     {
2031       if (vect_print_dump_info (REPORT_DETAILS))
2032         fprintf (vect_dump, "can't determine vectorization factor.");
2033       destroy_loop_vec_info (loop_vinfo);
2034       return NULL;
2035     }
2036 
2037   /* Analyze data dependences between the data-refs in the loop.
2038      FORNOW: fail at the first data dependence that we encounter.  */
2039 
2040   ok = vect_analyze_data_ref_dependences (loop_vinfo);
2041   if (!ok)
2042     {
2043       if (vect_print_dump_info (REPORT_DETAILS))
2044 	fprintf (vect_dump, "bad data dependence.");
2045       destroy_loop_vec_info (loop_vinfo);
2046       return NULL;
2047     }
2048 
2049   /* Analyze the access patterns of the data-refs in the loop (consecutive,
2050      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
2051 
2052   ok = vect_analyze_data_ref_accesses (loop_vinfo);
2053   if (!ok)
2054     {
2055       if (vect_print_dump_info (REPORT_DETAILS))
2056 	fprintf (vect_dump, "bad data access.");
2057       destroy_loop_vec_info (loop_vinfo);
2058       return NULL;
2059     }
2060 
2061   /* This pass will decide on using loop versioning and/or loop peeling in
2062      order to enhance the alignment of data references in the loop.  */
2063 
2064   ok = vect_enhance_data_refs_alignment (loop_vinfo);
2065   if (!ok)
2066     {
2067       if (vect_print_dump_info (REPORT_DETAILS))
2068 	fprintf (vect_dump, "bad data alignment.");
2069       destroy_loop_vec_info (loop_vinfo);
2070       return NULL;
2071     }
2072 
2073   /* Scan all the operations in the loop and make sure they are
2074      vectorizable.  */
2075 
2076   ok = vect_analyze_operations (loop_vinfo);
2077   if (!ok)
2078     {
2079       if (vect_print_dump_info (REPORT_DETAILS))
2080 	fprintf (vect_dump, "bad operation or unsupported loop bound.");
2081       destroy_loop_vec_info (loop_vinfo);
2082       return NULL;
2083     }
2084 
2085   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2086 
2087   return loop_vinfo;
2088 }
2089