1 /* Loop Vectorization
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3    Free Software Foundation, Inc.
4    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5    Ira Rosen <irar@il.ibm.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "params.h"
40 #include "diagnostic-core.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "target.h"
45 
46 /* Loop Vectorization Pass.
47 
48    This pass tries to vectorize loops.
49 
50    For example, the vectorizer transforms the following simple loop:
51 
52         short a[N]; short b[N]; short c[N]; int i;
53 
54         for (i=0; i<N; i++){
55           a[i] = b[i] + c[i];
56         }
57 
58    as if it was manually vectorized by rewriting the source code into:
59 
60         typedef int __attribute__((mode(V8HI))) v8hi;
61         short a[N];  short b[N]; short c[N];   int i;
62         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
63         v8hi va, vb, vc;
64 
65         for (i=0; i<N/8; i++){
66           vb = pb[i];
67           vc = pc[i];
68           va = vb + vc;
69           pa[i] = va;
70         }
71 
72         The main entry to this pass is vectorize_loops(), in which
73    the vectorizer applies a set of analyses on a given set of loops,
74    followed by the actual vectorization transformation for the loops that
75    had successfully passed the analysis phase.
76         Throughout this pass we make a distinction between two types of
77    data: scalars (which are represented by SSA_NAMES), and memory references
78    ("data-refs").  These two types of data require different handling both
79    during analysis and transformation. The types of data-refs that the
80    vectorizer currently supports are ARRAY_REFS which base is an array DECL
81    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
82    accesses are required to have a simple (consecutive) access pattern.
83 
84    Analysis phase:
85    ===============
86         The driver for the analysis phase is vect_analyze_loop().
87    It applies a set of analyses, some of which rely on the scalar evolution
88    analyzer (scev) developed by Sebastian Pop.
89 
90         During the analysis phase the vectorizer records some information
91    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
92    loop, as well as general information about the loop as a whole, which is
93    recorded in a "loop_vec_info" struct attached to each loop.
94 
95    Transformation phase:
96    =====================
97         The loop transformation phase scans all the stmts in the loop, and
98    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
99    the loop that needs to be vectorized.  It inserts the vector code sequence
100    just before the scalar stmt S, and records a pointer to the vector code
101    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
102    attached to S).  This pointer will be used for the vectorization of following
103    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
104    otherwise, we rely on dead code elimination for removing it.
105 
106         For example, say stmt S1 was vectorized into stmt VS1:
107 
108    VS1: vb = px[i];
109    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
110    S2:  a = b;
111 
112    To vectorize stmt S2, the vectorizer first finds the stmt that defines
113    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
114    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
115    resulting sequence would be:
116 
117    VS1: vb = px[i];
118    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
119    VS2: va = vb;
120    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
121 
122         Operands that are not SSA_NAMEs, are data-refs that appear in
123    load/store operations (like 'x[i]' in S1), and are handled differently.
124 
125    Target modeling:
126    =================
127         Currently the only target specific information that is used is the
128    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
129    Targets that can support different sizes of vectors, for now will need
130    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
131    flexibility will be added in the future.
132 
133         Since we only vectorize operations which vector form can be
134    expressed using existing tree codes, to verify that an operation is
135    supported, the vectorizer checks the relevant optab at the relevant
136    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
137    the value found is CODE_FOR_nothing, then there's no target support, and
138    we can't vectorize the stmt.
139 
140    For additional information on this project see:
141    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
142 */
143 
144 /* Function vect_determine_vectorization_factor
145 
146    Determine the vectorization factor (VF).  VF is the number of data elements
147    that are operated upon in parallel in a single iteration of the vectorized
148    loop.  For example, when vectorizing a loop that operates on 4byte elements,
149    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
150    elements can fit in a single vector register.
151 
152    We currently support vectorization of loops in which all types operated upon
153    are of the same size.  Therefore this function currently sets VF according to
154    the size of the types operated upon, and fails if there are multiple sizes
155    in the loop.
156 
157    VF is also the factor by which the loop iterations are strip-mined, e.g.:
158    original loop:
159         for (i=0; i<N; i++){
160           a[i] = b[i] + c[i];
161         }
162 
163    vectorized loop:
164         for (i=0; i<N; i+=VF){
165           a[i:VF] = b[i:VF] + c[i:VF];
166         }
167 */
168 
169 static bool
170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
171 {
172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
173   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
174   int nbbs = loop->num_nodes;
175   gimple_stmt_iterator si;
176   unsigned int vectorization_factor = 0;
177   tree scalar_type;
178   gimple phi;
179   tree vectype;
180   unsigned int nunits;
181   stmt_vec_info stmt_info;
182   int i;
183   HOST_WIDE_INT dummy;
184   gimple stmt, pattern_stmt = NULL;
185   gimple_seq pattern_def_seq = NULL;
186   gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
187   bool analyze_pattern_stmt = false;
188 
189   if (vect_print_dump_info (REPORT_DETAILS))
190     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
191 
192   for (i = 0; i < nbbs; i++)
193     {
194       basic_block bb = bbs[i];
195 
196       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
197 	{
198 	  phi = gsi_stmt (si);
199 	  stmt_info = vinfo_for_stmt (phi);
200 	  if (vect_print_dump_info (REPORT_DETAILS))
201 	    {
202 	      fprintf (vect_dump, "==> examining phi: ");
203 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
204 	    }
205 
206 	  gcc_assert (stmt_info);
207 
208 	  if (STMT_VINFO_RELEVANT_P (stmt_info))
209             {
210 	      gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
211               scalar_type = TREE_TYPE (PHI_RESULT (phi));
212 
213 	      if (vect_print_dump_info (REPORT_DETAILS))
214 		{
215 		  fprintf (vect_dump, "get vectype for scalar type:  ");
216 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
217 		}
218 
219 	      vectype = get_vectype_for_scalar_type (scalar_type);
220 	      if (!vectype)
221 		{
222 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
223 		    {
224 		      fprintf (vect_dump,
225 		               "not vectorized: unsupported data-type ");
226 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
227 		    }
228 		  return false;
229 		}
230 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
231 
232 	      if (vect_print_dump_info (REPORT_DETAILS))
233 		{
234 		  fprintf (vect_dump, "vectype: ");
235 		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
236 		}
237 
238 	      nunits = TYPE_VECTOR_SUBPARTS (vectype);
239 	      if (vect_print_dump_info (REPORT_DETAILS))
240 		fprintf (vect_dump, "nunits = %d", nunits);
241 
242 	      if (!vectorization_factor
243 		  || (nunits > vectorization_factor))
244 		vectorization_factor = nunits;
245 	    }
246 	}
247 
248       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
249         {
250           tree vf_vectype;
251 
252           if (analyze_pattern_stmt)
253 	    stmt = pattern_stmt;
254           else
255             stmt = gsi_stmt (si);
256 
257           stmt_info = vinfo_for_stmt (stmt);
258 
259 	  if (vect_print_dump_info (REPORT_DETAILS))
260 	    {
261 	      fprintf (vect_dump, "==> examining statement: ");
262 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
263 	    }
264 
265 	  gcc_assert (stmt_info);
266 
267 	  /* Skip stmts which do not need to be vectorized.  */
268 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
269 	      && !STMT_VINFO_LIVE_P (stmt_info))
270             {
271               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
272                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
273                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
274                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
275                 {
276                   stmt = pattern_stmt;
277                   stmt_info = vinfo_for_stmt (pattern_stmt);
278                   if (vect_print_dump_info (REPORT_DETAILS))
279                     {
280                       fprintf (vect_dump, "==> examining pattern statement: ");
281                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
282                     }
283                 }
284               else
285 	        {
286 	          if (vect_print_dump_info (REPORT_DETAILS))
287 	            fprintf (vect_dump, "skip.");
288                   gsi_next (&si);
289 	          continue;
290                 }
291 	    }
292           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
293                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
294                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
295                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
296             analyze_pattern_stmt = true;
297 
298 	  /* If a pattern statement has def stmts, analyze them too.  */
299 	  if (is_pattern_stmt_p (stmt_info))
300 	    {
301 	      if (pattern_def_seq == NULL)
302 		{
303 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
304 		  pattern_def_si = gsi_start (pattern_def_seq);
305 		}
306 	      else if (!gsi_end_p (pattern_def_si))
307 		gsi_next (&pattern_def_si);
308 	      if (pattern_def_seq != NULL)
309 		{
310 		  gimple pattern_def_stmt = NULL;
311 		  stmt_vec_info pattern_def_stmt_info = NULL;
312 
313 		  while (!gsi_end_p (pattern_def_si))
314 		    {
315 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
316 		      pattern_def_stmt_info
317 			= vinfo_for_stmt (pattern_def_stmt);
318 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
319 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
320 			break;
321 		      gsi_next (&pattern_def_si);
322 		    }
323 
324 		  if (!gsi_end_p (pattern_def_si))
325 		    {
326 		      if (vect_print_dump_info (REPORT_DETAILS))
327 			{
328 			  fprintf (vect_dump,
329 				   "==> examining pattern def stmt: ");
330 			  print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
331 					     TDF_SLIM);
332 			}
333 
334 		      stmt = pattern_def_stmt;
335 		      stmt_info = pattern_def_stmt_info;
336 		    }
337 		  else
338 		    {
339 		      pattern_def_si = gsi_start (NULL);
340 		      analyze_pattern_stmt = false;
341 		    }
342 		}
343 	      else
344 		analyze_pattern_stmt = false;
345 	    }
346 
347 	  if (gimple_get_lhs (stmt) == NULL_TREE)
348 	    {
349 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
350 		{
351 	          fprintf (vect_dump, "not vectorized: irregular stmt.");
352 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
353 		}
354 	      return false;
355 	    }
356 
357 	  if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
358 	    {
359 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
360 	        {
361 	          fprintf (vect_dump, "not vectorized: vector stmt in loop:");
362 	          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
363 	        }
364 	      return false;
365 	    }
366 
367 	  if (STMT_VINFO_VECTYPE (stmt_info))
368 	    {
369 	      /* The only case when a vectype had been already set is for stmts
370 	         that contain a dataref, or for "pattern-stmts" (stmts
371 		 generated by the vectorizer to represent/replace a certain
372 		 idiom).  */
373 	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
374 			  || is_pattern_stmt_p (stmt_info)
375 			  || !gsi_end_p (pattern_def_si));
376 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
377 	    }
378 	  else
379 	    {
380 	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
381 	      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
382 	      if (vect_print_dump_info (REPORT_DETAILS))
383 		{
384 		  fprintf (vect_dump, "get vectype for scalar type:  ");
385 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
386 		}
387 	      vectype = get_vectype_for_scalar_type (scalar_type);
388 	      if (!vectype)
389 		{
390 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
391 		    {
392 		      fprintf (vect_dump,
393 			       "not vectorized: unsupported data-type ");
394 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
395 		    }
396 		  return false;
397 		}
398 
399 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
400             }
401 
402 	  /* The vectorization factor is according to the smallest
403 	     scalar type (or the largest vector size, but we only
404 	     support one vector size per loop).  */
405 	  scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
406 						       &dummy);
407 	  if (vect_print_dump_info (REPORT_DETAILS))
408 	    {
409 	      fprintf (vect_dump, "get vectype for scalar type:  ");
410 	      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
411 	    }
412 	  vf_vectype = get_vectype_for_scalar_type (scalar_type);
413 	  if (!vf_vectype)
414 	    {
415 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
416 		{
417 		  fprintf (vect_dump,
418 			   "not vectorized: unsupported data-type ");
419 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
420 		}
421 	      return false;
422 	    }
423 
424 	  if ((GET_MODE_SIZE (TYPE_MODE (vectype))
425 	       != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
426 	    {
427 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
428 		{
429 		  fprintf (vect_dump,
430 			   "not vectorized: different sized vector "
431 			   "types in statement, ");
432 		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
433 		  fprintf (vect_dump, " and ");
434 		  print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
435 		}
436 	      return false;
437 	    }
438 
439 	  if (vect_print_dump_info (REPORT_DETAILS))
440 	    {
441 	      fprintf (vect_dump, "vectype: ");
442 	      print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
443 	    }
444 
445 	  nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
446 	  if (vect_print_dump_info (REPORT_DETAILS))
447 	    fprintf (vect_dump, "nunits = %d", nunits);
448 
449 	  if (!vectorization_factor
450 	      || (nunits > vectorization_factor))
451 	    vectorization_factor = nunits;
452 
453 	  if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
454 	    {
455 	      pattern_def_seq = NULL;
456 	      gsi_next (&si);
457 	    }
458         }
459     }
460 
461   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
462   if (vect_print_dump_info (REPORT_DETAILS))
463     fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
464   if (vectorization_factor <= 1)
465     {
466       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
467         fprintf (vect_dump, "not vectorized: unsupported data-type");
468       return false;
469     }
470   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
471 
472   return true;
473 }
474 
475 
476 /* Function vect_is_simple_iv_evolution.
477 
478    FORNOW: A simple evolution of an induction variables in the loop is
479    considered a polynomial evolution with constant step.  */
480 
481 static bool
482 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
483                              tree * step)
484 {
485   tree init_expr;
486   tree step_expr;
487   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
488 
489   /* When there is no evolution in this loop, the evolution function
490      is not "simple".  */
491   if (evolution_part == NULL_TREE)
492     return false;
493 
494   /* When the evolution is a polynomial of degree >= 2
495      the evolution function is not "simple".  */
496   if (tree_is_chrec (evolution_part))
497     return false;
498 
499   step_expr = evolution_part;
500   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
501 
502   if (vect_print_dump_info (REPORT_DETAILS))
503     {
504       fprintf (vect_dump, "step: ");
505       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
506       fprintf (vect_dump, ",  init: ");
507       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
508     }
509 
510   *init = init_expr;
511   *step = step_expr;
512 
513   if (TREE_CODE (step_expr) != INTEGER_CST)
514     {
515       if (vect_print_dump_info (REPORT_DETAILS))
516         fprintf (vect_dump, "step unknown.");
517       return false;
518     }
519 
520   return true;
521 }
522 
523 /* Function vect_analyze_scalar_cycles_1.
524 
525    Examine the cross iteration def-use cycles of scalar variables
526    in LOOP.  LOOP_VINFO represents the loop that is now being
527    considered for vectorization (can be LOOP, or an outer-loop
528    enclosing LOOP).  */
529 
530 static void
531 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
532 {
533   basic_block bb = loop->header;
534   tree dumy;
535   VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
536   gimple_stmt_iterator gsi;
537   bool double_reduc;
538 
539   if (vect_print_dump_info (REPORT_DETAILS))
540     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
541 
542   /* First - identify all inductions.  Reduction detection assumes that all the
543      inductions have been identified, therefore, this order must not be
544      changed.  */
545   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
546     {
547       gimple phi = gsi_stmt (gsi);
548       tree access_fn = NULL;
549       tree def = PHI_RESULT (phi);
550       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
551 
552       if (vect_print_dump_info (REPORT_DETAILS))
553 	{
554 	  fprintf (vect_dump, "Analyze phi: ");
555 	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
556 	}
557 
558       /* Skip virtual phi's.  The data dependences that are associated with
559          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
560       if (!is_gimple_reg (SSA_NAME_VAR (def)))
561 	continue;
562 
563       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
564 
565       /* Analyze the evolution function.  */
566       access_fn = analyze_scalar_evolution (loop, def);
567       if (access_fn)
568 	{
569 	  STRIP_NOPS (access_fn);
570 	  if (vect_print_dump_info (REPORT_DETAILS))
571 	    {
572 	      fprintf (vect_dump, "Access function of PHI: ");
573 	      print_generic_expr (vect_dump, access_fn, TDF_SLIM);
574 	    }
575 	  STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
576 	    = evolution_part_in_loop_num (access_fn, loop->num);
577 	}
578 
579       if (!access_fn
580 	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
581 	{
582 	  VEC_safe_push (gimple, heap, worklist, phi);
583 	  continue;
584 	}
585 
586       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
587 
588       if (vect_print_dump_info (REPORT_DETAILS))
589 	fprintf (vect_dump, "Detected induction.");
590       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
591     }
592 
593 
594   /* Second - identify all reductions and nested cycles.  */
595   while (VEC_length (gimple, worklist) > 0)
596     {
597       gimple phi = VEC_pop (gimple, worklist);
598       tree def = PHI_RESULT (phi);
599       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
600       gimple reduc_stmt;
601       bool nested_cycle;
602 
603       if (vect_print_dump_info (REPORT_DETAILS))
604         {
605           fprintf (vect_dump, "Analyze phi: ");
606           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
607         }
608 
609       gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
610       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
611 
612       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
613       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
614 						&double_reduc);
615       if (reduc_stmt)
616         {
617           if (double_reduc)
618             {
619               if (vect_print_dump_info (REPORT_DETAILS))
620                 fprintf (vect_dump, "Detected double reduction.");
621 
622               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
623               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
624                                                     vect_double_reduction_def;
625             }
626           else
627             {
628               if (nested_cycle)
629                 {
630                   if (vect_print_dump_info (REPORT_DETAILS))
631                     fprintf (vect_dump, "Detected vectorizable nested cycle.");
632 
633                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
634                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
635                                                              vect_nested_cycle;
636                 }
637               else
638                 {
639                   if (vect_print_dump_info (REPORT_DETAILS))
640                     fprintf (vect_dump, "Detected reduction.");
641 
642                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
643                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
644                                                            vect_reduction_def;
645                   /* Store the reduction cycles for possible vectorization in
646                      loop-aware SLP.  */
647                   VEC_safe_push (gimple, heap,
648                                  LOOP_VINFO_REDUCTIONS (loop_vinfo),
649                                  reduc_stmt);
650                 }
651             }
652         }
653       else
654         if (vect_print_dump_info (REPORT_DETAILS))
655           fprintf (vect_dump, "Unknown def-use cycle pattern.");
656     }
657 
658   VEC_free (gimple, heap, worklist);
659 }
660 
661 
662 /* Function vect_analyze_scalar_cycles.
663 
664    Examine the cross iteration def-use cycles of scalar variables, by
665    analyzing the loop-header PHIs of scalar variables.  Classify each
666    cycle as one of the following: invariant, induction, reduction, unknown.
667    We do that for the loop represented by LOOP_VINFO, and also to its
668    inner-loop, if exists.
669    Examples for scalar cycles:
670 
671    Example1: reduction:
672 
673               loop1:
674               for (i=0; i<N; i++)
675                  sum += a[i];
676 
677    Example2: induction:
678 
679               loop2:
680               for (i=0; i<N; i++)
681                  a[i] = i;  */
682 
683 static void
684 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
685 {
686   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
687 
688   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
689 
690   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
691      Reductions in such inner-loop therefore have different properties than
692      the reductions in the nest that gets vectorized:
693      1. When vectorized, they are executed in the same order as in the original
694         scalar loop, so we can't change the order of computation when
695         vectorizing them.
696      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
697         current checks are too strict.  */
698 
699   if (loop->inner)
700     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
701 }
702 
703 /* Function vect_get_loop_niters.
704 
705    Determine how many iterations the loop is executed.
706    If an expression that represents the number of iterations
707    can be constructed, place it in NUMBER_OF_ITERATIONS.
708    Return the loop exit condition.  */
709 
710 static gimple
711 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
712 {
713   tree niters;
714 
715   if (vect_print_dump_info (REPORT_DETAILS))
716     fprintf (vect_dump, "=== get_loop_niters ===");
717 
718   niters = number_of_exit_cond_executions (loop);
719 
720   if (niters != NULL_TREE
721       && niters != chrec_dont_know)
722     {
723       *number_of_iterations = niters;
724 
725       if (vect_print_dump_info (REPORT_DETAILS))
726         {
727           fprintf (vect_dump, "==> get_loop_niters:" );
728           print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
729         }
730     }
731 
732   return get_loop_exit_condition (loop);
733 }
734 
735 
736 /* Function bb_in_loop_p
737 
738    Used as predicate for dfs order traversal of the loop bbs.  */
739 
740 static bool
741 bb_in_loop_p (const_basic_block bb, const void *data)
742 {
743   const struct loop *const loop = (const struct loop *)data;
744   if (flow_bb_inside_loop_p (loop, bb))
745     return true;
746   return false;
747 }
748 
749 
750 /* Function new_loop_vec_info.
751 
752    Create and initialize a new loop_vec_info struct for LOOP, as well as
753    stmt_vec_info structs for all the stmts in LOOP.  */
754 
755 static loop_vec_info
756 new_loop_vec_info (struct loop *loop)
757 {
758   loop_vec_info res;
759   basic_block *bbs;
760   gimple_stmt_iterator si;
761   unsigned int i, nbbs;
762 
763   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
764   LOOP_VINFO_LOOP (res) = loop;
765 
766   bbs = get_loop_body (loop);
767 
768   /* Create/Update stmt_info for all stmts in the loop.  */
769   for (i = 0; i < loop->num_nodes; i++)
770     {
771       basic_block bb = bbs[i];
772 
773       /* BBs in a nested inner-loop will have been already processed (because
774          we will have called vect_analyze_loop_form for any nested inner-loop).
775          Therefore, for stmts in an inner-loop we just want to update the
776          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
777          loop_info of the outer-loop we are currently considering to vectorize
778          (instead of the loop_info of the inner-loop).
779          For stmts in other BBs we need to create a stmt_info from scratch.  */
780       if (bb->loop_father != loop)
781         {
782           /* Inner-loop bb.  */
783           gcc_assert (loop->inner && bb->loop_father == loop->inner);
784           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
785             {
786               gimple phi = gsi_stmt (si);
787               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
788               loop_vec_info inner_loop_vinfo =
789                 STMT_VINFO_LOOP_VINFO (stmt_info);
790               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
791               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
792             }
793           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
794            {
795               gimple stmt = gsi_stmt (si);
796               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797               loop_vec_info inner_loop_vinfo =
798                  STMT_VINFO_LOOP_VINFO (stmt_info);
799               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
800               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
801            }
802         }
803       else
804         {
805           /* bb in current nest.  */
806           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
807             {
808               gimple phi = gsi_stmt (si);
809               gimple_set_uid (phi, 0);
810               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
811             }
812 
813           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
814             {
815               gimple stmt = gsi_stmt (si);
816               gimple_set_uid (stmt, 0);
817               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
818             }
819         }
820     }
821 
822   /* CHECKME: We want to visit all BBs before their successors (except for
823      latch blocks, for which this assertion wouldn't hold).  In the simple
824      case of the loop forms we allow, a dfs order of the BBs would the same
825      as reversed postorder traversal, so we are safe.  */
826 
827    free (bbs);
828    bbs = XCNEWVEC (basic_block, loop->num_nodes);
829    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
830                               bbs, loop->num_nodes, loop);
831    gcc_assert (nbbs == loop->num_nodes);
832 
833   LOOP_VINFO_BBS (res) = bbs;
834   LOOP_VINFO_NITERS (res) = NULL;
835   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
836   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
837   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
838   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
839   LOOP_VINFO_VECT_FACTOR (res) = 0;
840   LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
841   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
842   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
843   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
844   LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
845     VEC_alloc (gimple, heap,
846                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
847   LOOP_VINFO_MAY_ALIAS_DDRS (res) =
848     VEC_alloc (ddr_p, heap,
849                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
850   LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
851   LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
852   LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
853   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
854   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
855   LOOP_VINFO_PEELING_HTAB (res) = NULL;
856   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
857 
858   return res;
859 }
860 
861 
862 /* Function destroy_loop_vec_info.
863 
864    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
865    stmts in the loop.  */
866 
867 void
868 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
869 {
870   struct loop *loop;
871   basic_block *bbs;
872   int nbbs;
873   gimple_stmt_iterator si;
874   int j;
875   VEC (slp_instance, heap) *slp_instances;
876   slp_instance instance;
877 
878   if (!loop_vinfo)
879     return;
880 
881   loop = LOOP_VINFO_LOOP (loop_vinfo);
882 
883   bbs = LOOP_VINFO_BBS (loop_vinfo);
884   nbbs = loop->num_nodes;
885 
886   if (!clean_stmts)
887     {
888       free (LOOP_VINFO_BBS (loop_vinfo));
889       free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
890       free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
891       VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
892       VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
893       VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
894 
895       free (loop_vinfo);
896       loop->aux = NULL;
897       return;
898     }
899 
900   for (j = 0; j < nbbs; j++)
901     {
902       basic_block bb = bbs[j];
903       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
904         free_stmt_vec_info (gsi_stmt (si));
905 
906       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
907         {
908           gimple stmt = gsi_stmt (si);
909 	  /* Free stmt_vec_info.  */
910 	  free_stmt_vec_info (stmt);
911           gsi_next (&si);
912         }
913     }
914 
915   free (LOOP_VINFO_BBS (loop_vinfo));
916   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
917   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
918   VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
919   VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
920   VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
921   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
922   FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
923     vect_free_slp_instance (instance);
924 
925   VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
926   VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
927   VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
928   VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
929 
930   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
931     htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
932 
933   free (loop_vinfo);
934   loop->aux = NULL;
935 }
936 
937 
938 /* Function vect_analyze_loop_1.
939 
940    Apply a set of analyses on LOOP, and create a loop_vec_info struct
941    for it. The different analyses will record information in the
942    loop_vec_info struct.  This is a subset of the analyses applied in
943    vect_analyze_loop, to be applied on an inner-loop nested in the loop
944    that is now considered for (outer-loop) vectorization.  */
945 
946 static loop_vec_info
947 vect_analyze_loop_1 (struct loop *loop)
948 {
949   loop_vec_info loop_vinfo;
950 
951   if (vect_print_dump_info (REPORT_DETAILS))
952     fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
953 
954   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
955 
956   loop_vinfo = vect_analyze_loop_form (loop);
957   if (!loop_vinfo)
958     {
959       if (vect_print_dump_info (REPORT_DETAILS))
960         fprintf (vect_dump, "bad inner-loop form.");
961       return NULL;
962     }
963 
964   return loop_vinfo;
965 }
966 
967 
968 /* Function vect_analyze_loop_form.
969 
970    Verify that certain CFG restrictions hold, including:
971    - the loop has a pre-header
972    - the loop has a single entry and exit
973    - the loop exit condition is simple enough, and the number of iterations
974      can be analyzed (a countable loop).  */
975 
976 loop_vec_info
977 vect_analyze_loop_form (struct loop *loop)
978 {
979   loop_vec_info loop_vinfo;
980   gimple loop_cond;
981   tree number_of_iterations = NULL;
982   loop_vec_info inner_loop_vinfo = NULL;
983 
984   if (vect_print_dump_info (REPORT_DETAILS))
985     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
986 
987   /* Different restrictions apply when we are considering an inner-most loop,
988      vs. an outer (nested) loop.
989      (FORNOW. May want to relax some of these restrictions in the future).  */
990 
991   if (!loop->inner)
992     {
993       /* Inner-most loop.  We currently require that the number of BBs is
994 	 exactly 2 (the header and latch).  Vectorizable inner-most loops
995 	 look like this:
996 
997                         (pre-header)
998                            |
999                           header <--------+
1000                            | |            |
1001                            | +--> latch --+
1002                            |
1003                         (exit-bb)  */
1004 
1005       if (loop->num_nodes != 2)
1006         {
1007           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1008             fprintf (vect_dump, "not vectorized: control flow in loop.");
1009           return NULL;
1010         }
1011 
1012       if (empty_block_p (loop->header))
1013     {
1014           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1015             fprintf (vect_dump, "not vectorized: empty loop.");
1016       return NULL;
1017     }
1018     }
1019   else
1020     {
1021       struct loop *innerloop = loop->inner;
1022       edge entryedge;
1023 
1024       /* Nested loop. We currently require that the loop is doubly-nested,
1025 	 contains a single inner loop, and the number of BBs is exactly 5.
1026 	 Vectorizable outer-loops look like this:
1027 
1028 			(pre-header)
1029 			   |
1030 			  header <---+
1031 			   |         |
1032 		          inner-loop |
1033 			   |         |
1034 			  tail ------+
1035 			   |
1036 		        (exit-bb)
1037 
1038 	 The inner-loop has the properties expected of inner-most loops
1039 	 as described above.  */
1040 
1041       if ((loop->inner)->inner || (loop->inner)->next)
1042 	{
1043 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1044 	    fprintf (vect_dump, "not vectorized: multiple nested loops.");
1045 	  return NULL;
1046 	}
1047 
1048       /* Analyze the inner-loop.  */
1049       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1050       if (!inner_loop_vinfo)
1051 	{
1052 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1053             fprintf (vect_dump, "not vectorized: Bad inner loop.");
1054 	  return NULL;
1055 	}
1056 
1057       if (!expr_invariant_in_loop_p (loop,
1058 					LOOP_VINFO_NITERS (inner_loop_vinfo)))
1059 	{
1060 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1061 	    fprintf (vect_dump,
1062 		     "not vectorized: inner-loop count not invariant.");
1063 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1064 	  return NULL;
1065 	}
1066 
1067       if (loop->num_nodes != 5)
1068         {
1069 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1070 	    fprintf (vect_dump, "not vectorized: control flow in loop.");
1071 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1072 	  return NULL;
1073         }
1074 
1075       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1076       entryedge = EDGE_PRED (innerloop->header, 0);
1077       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1078 	entryedge = EDGE_PRED (innerloop->header, 1);
1079 
1080       if (entryedge->src != loop->header
1081 	  || !single_exit (innerloop)
1082 	  || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1083 	{
1084 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1085 	    fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
1086 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1087 	  return NULL;
1088 	}
1089 
1090       if (vect_print_dump_info (REPORT_DETAILS))
1091         fprintf (vect_dump, "Considering outer-loop vectorization.");
1092     }
1093 
1094   if (!single_exit (loop)
1095       || EDGE_COUNT (loop->header->preds) != 2)
1096     {
1097       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1098         {
1099           if (!single_exit (loop))
1100             fprintf (vect_dump, "not vectorized: multiple exits.");
1101           else if (EDGE_COUNT (loop->header->preds) != 2)
1102             fprintf (vect_dump, "not vectorized: too many incoming edges.");
1103         }
1104       if (inner_loop_vinfo)
1105 	destroy_loop_vec_info (inner_loop_vinfo, true);
1106       return NULL;
1107     }
1108 
1109   /* We assume that the loop exit condition is at the end of the loop. i.e,
1110      that the loop is represented as a do-while (with a proper if-guard
1111      before the loop if needed), where the loop header contains all the
1112      executable statements, and the latch is empty.  */
1113   if (!empty_block_p (loop->latch)
1114         || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1115     {
1116       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1117         fprintf (vect_dump, "not vectorized: unexpected loop form.");
1118       if (inner_loop_vinfo)
1119 	destroy_loop_vec_info (inner_loop_vinfo, true);
1120       return NULL;
1121     }
1122 
1123   /* Make sure there exists a single-predecessor exit bb:  */
1124   if (!single_pred_p (single_exit (loop)->dest))
1125     {
1126       edge e = single_exit (loop);
1127       if (!(e->flags & EDGE_ABNORMAL))
1128 	{
1129 	  split_loop_exit_edge (e);
1130 	  if (vect_print_dump_info (REPORT_DETAILS))
1131 	    fprintf (vect_dump, "split exit edge.");
1132 	}
1133       else
1134 	{
1135 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1136 	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1137 	  if (inner_loop_vinfo)
1138 	    destroy_loop_vec_info (inner_loop_vinfo, true);
1139 	  return NULL;
1140 	}
1141     }
1142 
1143   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1144   if (!loop_cond)
1145     {
1146       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1147 	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1148       if (inner_loop_vinfo)
1149 	destroy_loop_vec_info (inner_loop_vinfo, true);
1150       return NULL;
1151     }
1152 
1153   if (!number_of_iterations)
1154     {
1155       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1156 	fprintf (vect_dump,
1157 		 "not vectorized: number of iterations cannot be computed.");
1158       if (inner_loop_vinfo)
1159 	destroy_loop_vec_info (inner_loop_vinfo, true);
1160       return NULL;
1161     }
1162 
1163   if (chrec_contains_undetermined (number_of_iterations))
1164     {
1165       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1166         fprintf (vect_dump, "Infinite number of iterations.");
1167       if (inner_loop_vinfo)
1168 	destroy_loop_vec_info (inner_loop_vinfo, true);
1169       return NULL;
1170     }
1171 
1172   if (!NITERS_KNOWN_P (number_of_iterations))
1173     {
1174       if (vect_print_dump_info (REPORT_DETAILS))
1175         {
1176           fprintf (vect_dump, "Symbolic number of iterations is ");
1177           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1178         }
1179     }
1180   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1181     {
1182       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1183         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1184       if (inner_loop_vinfo)
1185         destroy_loop_vec_info (inner_loop_vinfo, false);
1186       return NULL;
1187     }
1188 
1189   loop_vinfo = new_loop_vec_info (loop);
1190   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1191   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1192 
1193   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1194 
1195   /* CHECKME: May want to keep it around it in the future.  */
1196   if (inner_loop_vinfo)
1197     destroy_loop_vec_info (inner_loop_vinfo, false);
1198 
1199   gcc_assert (!loop->aux);
1200   loop->aux = loop_vinfo;
1201   return loop_vinfo;
1202 }
1203 
1204 
1205 /* Get cost by calling cost target builtin.  */
1206 
1207 static inline int
1208 vect_get_cost (enum vect_cost_for_stmt type_of_cost)
1209 {
1210   tree dummy_type = NULL;
1211   int dummy = 0;
1212 
1213   return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1214                                                        dummy_type, dummy);
1215 }
1216 
1217 
1218 /* Function vect_analyze_loop_operations.
1219 
1220    Scan the loop stmts and make sure they are all vectorizable.  */
1221 
1222 static bool
1223 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1224 {
1225   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1226   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1227   int nbbs = loop->num_nodes;
1228   gimple_stmt_iterator si;
1229   unsigned int vectorization_factor = 0;
1230   int i;
1231   gimple phi;
1232   stmt_vec_info stmt_info;
1233   bool need_to_vectorize = false;
1234   int min_profitable_iters;
1235   int min_scalar_loop_bound;
1236   unsigned int th;
1237   bool only_slp_in_loop = true, ok;
1238 
1239   if (vect_print_dump_info (REPORT_DETAILS))
1240     fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1241 
1242   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1243   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1244   if (slp)
1245     {
1246       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1247 	 vectorization factor of the loop is the unrolling factor required by
1248 	 the SLP instances.  If that unrolling factor is 1, we say, that we
1249 	 perform pure SLP on loop - cross iteration parallelism is not
1250 	 exploited.  */
1251       for (i = 0; i < nbbs; i++)
1252 	{
1253 	  basic_block bb = bbs[i];
1254 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1255 	    {
1256 	      gimple stmt = gsi_stmt (si);
1257 	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1258 	      gcc_assert (stmt_info);
1259 	      if ((STMT_VINFO_RELEVANT_P (stmt_info)
1260 		   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1261 		  && !PURE_SLP_STMT (stmt_info))
1262 		/* STMT needs both SLP and loop-based vectorization.  */
1263 		only_slp_in_loop = false;
1264 	    }
1265 	}
1266 
1267       if (only_slp_in_loop)
1268 	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1269       else
1270 	vectorization_factor = least_common_multiple (vectorization_factor,
1271 				LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1272 
1273       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1274       if (vect_print_dump_info (REPORT_DETAILS))
1275 	fprintf (vect_dump, "Updating vectorization factor to %d ",
1276 	 		    vectorization_factor);
1277     }
1278 
1279   for (i = 0; i < nbbs; i++)
1280     {
1281       basic_block bb = bbs[i];
1282 
1283       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1284         {
1285           phi = gsi_stmt (si);
1286           ok = true;
1287 
1288           stmt_info = vinfo_for_stmt (phi);
1289           if (vect_print_dump_info (REPORT_DETAILS))
1290             {
1291               fprintf (vect_dump, "examining phi: ");
1292               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1293             }
1294 
1295           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1296              (i.e., a phi in the tail of the outer-loop).  */
1297           if (! is_loop_header_bb_p (bb))
1298             {
1299               /* FORNOW: we currently don't support the case that these phis
1300                  are not used in the outerloop (unless it is double reduction,
1301                  i.e., this phi is vect_reduction_def), cause this case
1302                  requires to actually do something here.  */
1303               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1304                    || STMT_VINFO_LIVE_P (stmt_info))
1305                   && STMT_VINFO_DEF_TYPE (stmt_info)
1306                      != vect_double_reduction_def)
1307                 {
1308                   if (vect_print_dump_info (REPORT_DETAILS))
1309                     fprintf (vect_dump,
1310                              "Unsupported loop-closed phi in outer-loop.");
1311                   return false;
1312                 }
1313 
1314               /* If PHI is used in the outer loop, we check that its operand
1315                  is defined in the inner loop.  */
1316               if (STMT_VINFO_RELEVANT_P (stmt_info))
1317                 {
1318                   tree phi_op;
1319                   gimple op_def_stmt;
1320 
1321                   if (gimple_phi_num_args (phi) != 1)
1322                     return false;
1323 
1324                   phi_op = PHI_ARG_DEF (phi, 0);
1325                   if (TREE_CODE (phi_op) != SSA_NAME)
1326                     return false;
1327 
1328                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1329 		  if (!op_def_stmt
1330 		      || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1331 		      || !vinfo_for_stmt (op_def_stmt))
1332                     return false;
1333 
1334                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1335                         != vect_used_in_outer
1336                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1337                            != vect_used_in_outer_by_reduction)
1338                     return false;
1339                 }
1340 
1341               continue;
1342             }
1343 
1344           gcc_assert (stmt_info);
1345 
1346           if (STMT_VINFO_LIVE_P (stmt_info))
1347             {
1348               /* FORNOW: not yet supported.  */
1349               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1350                 fprintf (vect_dump, "not vectorized: value used after loop.");
1351               return false;
1352             }
1353 
1354           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1355               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1356             {
1357               /* A scalar-dependence cycle that we don't support.  */
1358               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1359                 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1360               return false;
1361             }
1362 
1363           if (STMT_VINFO_RELEVANT_P (stmt_info))
1364             {
1365               need_to_vectorize = true;
1366               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1367                 ok = vectorizable_induction (phi, NULL, NULL);
1368             }
1369 
1370           if (!ok)
1371             {
1372               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1373                 {
1374                   fprintf (vect_dump,
1375                            "not vectorized: relevant phi not supported: ");
1376                   print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1377                 }
1378               return false;
1379             }
1380         }
1381 
1382       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1383         {
1384           gimple stmt = gsi_stmt (si);
1385 	  if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1386 	    return false;
1387         }
1388     } /* bbs */
1389 
1390   /* All operations in the loop are either irrelevant (deal with loop
1391      control, or dead), or only used outside the loop and can be moved
1392      out of the loop (e.g. invariants, inductions).  The loop can be
1393      optimized away by scalar optimizations.  We're better off not
1394      touching this loop.  */
1395   if (!need_to_vectorize)
1396     {
1397       if (vect_print_dump_info (REPORT_DETAILS))
1398         fprintf (vect_dump,
1399                  "All the computation can be taken out of the loop.");
1400       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1401         fprintf (vect_dump,
1402                  "not vectorized: redundant loop. no profit to vectorize.");
1403       return false;
1404     }
1405 
1406   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1407       && vect_print_dump_info (REPORT_DETAILS))
1408     fprintf (vect_dump,
1409         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1410         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1411 
1412   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1413       && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1414     {
1415       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1416         fprintf (vect_dump, "not vectorized: iteration count too small.");
1417       if (vect_print_dump_info (REPORT_DETAILS))
1418         fprintf (vect_dump,"not vectorized: iteration count smaller than "
1419                  "vectorization factor.");
1420       return false;
1421     }
1422 
1423   /* Analyze cost.  Decide if worth while to vectorize.  */
1424 
1425   /* Once VF is set, SLP costs should be updated since the number of created
1426      vector stmts depends on VF.  */
1427   vect_update_slp_costs_according_to_vf (loop_vinfo);
1428 
1429   min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1430   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1431 
1432   if (min_profitable_iters < 0)
1433     {
1434       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1435         fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1436       if (vect_print_dump_info (REPORT_DETAILS))
1437         fprintf (vect_dump, "not vectorized: vector version will never be "
1438                  "profitable.");
1439       return false;
1440     }
1441 
1442   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1443                             * vectorization_factor) - 1);
1444 
1445   /* Use the cost model only if it is more conservative than user specified
1446      threshold.  */
1447 
1448   th = (unsigned) min_scalar_loop_bound;
1449   if (min_profitable_iters
1450       && (!min_scalar_loop_bound
1451           || min_profitable_iters > min_scalar_loop_bound))
1452     th = (unsigned) min_profitable_iters;
1453 
1454   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1455       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1456     {
1457       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1458         fprintf (vect_dump, "not vectorized: vectorization not "
1459                  "profitable.");
1460       if (vect_print_dump_info (REPORT_DETAILS))
1461         fprintf (vect_dump, "not vectorized: iteration count smaller than "
1462                  "user specified loop bound parameter or minimum "
1463                  "profitable iterations (whichever is more conservative).");
1464       return false;
1465     }
1466 
1467   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1468       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1469       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1470     {
1471       if (vect_print_dump_info (REPORT_DETAILS))
1472         fprintf (vect_dump, "epilog loop required.");
1473       if (!vect_can_advance_ivs_p (loop_vinfo))
1474         {
1475           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1476             fprintf (vect_dump,
1477                      "not vectorized: can't create epilog loop 1.");
1478           return false;
1479         }
1480       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1481         {
1482           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1483             fprintf (vect_dump,
1484                      "not vectorized: can't create epilog loop 2.");
1485           return false;
1486         }
1487     }
1488 
1489   return true;
1490 }
1491 
1492 
1493 /* Function vect_analyze_loop_2.
1494 
1495    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1496    for it.  The different analyses will record information in the
1497    loop_vec_info struct.  */
1498 static bool
1499 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1500 {
1501   bool ok, slp = false;
1502   int max_vf = MAX_VECTORIZATION_FACTOR;
1503   int min_vf = 2;
1504 
1505   /* Find all data references in the loop (which correspond to vdefs/vuses)
1506      and analyze their evolution in the loop.  Also adjust the minimal
1507      vectorization factor according to the loads and stores.
1508 
1509      FORNOW: Handle only simple, array references, which
1510      alignment can be forced, and aligned pointer-references.  */
1511 
1512   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1513   if (!ok)
1514     {
1515       if (vect_print_dump_info (REPORT_DETAILS))
1516 	fprintf (vect_dump, "bad data references.");
1517       return false;
1518     }
1519 
1520   /* Classify all cross-iteration scalar data-flow cycles.
1521      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1522 
1523   vect_analyze_scalar_cycles (loop_vinfo);
1524 
1525   vect_pattern_recog (loop_vinfo);
1526 
1527   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1528 
1529   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1530   if (!ok)
1531     {
1532       if (vect_print_dump_info (REPORT_DETAILS))
1533 	fprintf (vect_dump, "unexpected pattern.");
1534       return false;
1535     }
1536 
1537   /* Analyze data dependences between the data-refs in the loop
1538      and adjust the maximum vectorization factor according to
1539      the dependences.
1540      FORNOW: fail at the first data dependence that we encounter.  */
1541 
1542   ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
1543   if (!ok
1544       || max_vf < min_vf)
1545     {
1546       if (vect_print_dump_info (REPORT_DETAILS))
1547 	fprintf (vect_dump, "bad data dependence.");
1548       return false;
1549     }
1550 
1551   ok = vect_determine_vectorization_factor (loop_vinfo);
1552   if (!ok)
1553     {
1554       if (vect_print_dump_info (REPORT_DETAILS))
1555         fprintf (vect_dump, "can't determine vectorization factor.");
1556       return false;
1557     }
1558   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1559     {
1560       if (vect_print_dump_info (REPORT_DETAILS))
1561 	fprintf (vect_dump, "bad data dependence.");
1562       return false;
1563     }
1564 
1565   /* Analyze the alignment of the data-refs in the loop.
1566      Fail if a data reference is found that cannot be vectorized.  */
1567 
1568   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1569   if (!ok)
1570     {
1571       if (vect_print_dump_info (REPORT_DETAILS))
1572 	fprintf (vect_dump, "bad data alignment.");
1573       return false;
1574     }
1575 
1576   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1577      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1578 
1579   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1580   if (!ok)
1581     {
1582       if (vect_print_dump_info (REPORT_DETAILS))
1583 	fprintf (vect_dump, "bad data access.");
1584       return false;
1585     }
1586 
1587   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1588      It is important to call pruning after vect_analyze_data_ref_accesses,
1589      since we use grouping information gathered by interleaving analysis.  */
1590   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1591   if (!ok)
1592     {
1593       if (vect_print_dump_info (REPORT_DETAILS))
1594 	fprintf (vect_dump, "too long list of versioning for alias "
1595 			    "run-time tests.");
1596       return false;
1597     }
1598 
1599   /* This pass will decide on using loop versioning and/or loop peeling in
1600      order to enhance the alignment of data references in the loop.  */
1601 
1602   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1603   if (!ok)
1604     {
1605       if (vect_print_dump_info (REPORT_DETAILS))
1606         fprintf (vect_dump, "bad data alignment.");
1607       return false;
1608     }
1609 
1610   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1611   ok = vect_analyze_slp (loop_vinfo, NULL);
1612   if (ok)
1613     {
1614       /* Decide which possible SLP instances to SLP.  */
1615       slp = vect_make_slp_decision (loop_vinfo);
1616 
1617       /* Find stmts that need to be both vectorized and SLPed.  */
1618       vect_detect_hybrid_slp (loop_vinfo);
1619     }
1620   else
1621     return false;
1622 
1623   /* Scan all the operations in the loop and make sure they are
1624      vectorizable.  */
1625 
1626   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1627   if (!ok)
1628     {
1629       if (vect_print_dump_info (REPORT_DETAILS))
1630 	fprintf (vect_dump, "bad operation or unsupported loop bound.");
1631       return false;
1632     }
1633 
1634   return true;
1635 }
1636 
1637 /* Function vect_analyze_loop.
1638 
1639    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1640    for it.  The different analyses will record information in the
1641    loop_vec_info struct.  */
1642 loop_vec_info
1643 vect_analyze_loop (struct loop *loop)
1644 {
1645   loop_vec_info loop_vinfo;
1646   unsigned int vector_sizes;
1647 
1648   /* Autodetect first vector size we try.  */
1649   current_vector_size = 0;
1650   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1651 
1652   if (vect_print_dump_info (REPORT_DETAILS))
1653     fprintf (vect_dump, "===== analyze_loop_nest =====");
1654 
1655   if (loop_outer (loop)
1656       && loop_vec_info_for_loop (loop_outer (loop))
1657       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1658     {
1659       if (vect_print_dump_info (REPORT_DETAILS))
1660 	fprintf (vect_dump, "outer-loop already vectorized.");
1661       return NULL;
1662     }
1663 
1664   while (1)
1665     {
1666       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1667       loop_vinfo = vect_analyze_loop_form (loop);
1668       if (!loop_vinfo)
1669 	{
1670 	  if (vect_print_dump_info (REPORT_DETAILS))
1671 	    fprintf (vect_dump, "bad loop form.");
1672 	  return NULL;
1673 	}
1674 
1675       if (vect_analyze_loop_2 (loop_vinfo))
1676 	{
1677 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1678 
1679 	  return loop_vinfo;
1680 	}
1681 
1682       destroy_loop_vec_info (loop_vinfo, true);
1683 
1684       vector_sizes &= ~current_vector_size;
1685       if (vector_sizes == 0
1686 	  || current_vector_size == 0)
1687 	return NULL;
1688 
1689       /* Try the next biggest vector size.  */
1690       current_vector_size = 1 << floor_log2 (vector_sizes);
1691       if (vect_print_dump_info (REPORT_DETAILS))
1692 	fprintf (vect_dump, "***** Re-trying analysis with "
1693 		 "vector size %d\n", current_vector_size);
1694     }
1695 }
1696 
1697 
1698 /* Function reduction_code_for_scalar_code
1699 
1700    Input:
1701    CODE - tree_code of a reduction operations.
1702 
1703    Output:
1704    REDUC_CODE - the corresponding tree-code to be used to reduce the
1705       vector of partial results into a single scalar result (which
1706       will also reside in a vector) or ERROR_MARK if the operation is
1707       a supported reduction operation, but does not have such tree-code.
1708 
1709    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1710 
1711 static bool
1712 reduction_code_for_scalar_code (enum tree_code code,
1713                                 enum tree_code *reduc_code)
1714 {
1715   switch (code)
1716     {
1717       case MAX_EXPR:
1718         *reduc_code = REDUC_MAX_EXPR;
1719         return true;
1720 
1721       case MIN_EXPR:
1722         *reduc_code = REDUC_MIN_EXPR;
1723         return true;
1724 
1725       case PLUS_EXPR:
1726         *reduc_code = REDUC_PLUS_EXPR;
1727         return true;
1728 
1729       case MULT_EXPR:
1730       case MINUS_EXPR:
1731       case BIT_IOR_EXPR:
1732       case BIT_XOR_EXPR:
1733       case BIT_AND_EXPR:
1734         *reduc_code = ERROR_MARK;
1735         return true;
1736 
1737       default:
1738        return false;
1739     }
1740 }
1741 
1742 
1743 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1744    STMT is printed with a message MSG. */
1745 
1746 static void
1747 report_vect_op (gimple stmt, const char *msg)
1748 {
1749   fprintf (vect_dump, "%s", msg);
1750   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1751 }
1752 
1753 
1754 /* Detect SLP reduction of the form:
1755 
1756    #a1 = phi <a5, a0>
1757    a2 = operation (a1)
1758    a3 = operation (a2)
1759    a4 = operation (a3)
1760    a5 = operation (a4)
1761 
1762    #a = phi <a5>
1763 
1764    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1765    FIRST_STMT is the first reduction stmt in the chain
1766    (a2 = operation (a1)).
1767 
1768    Return TRUE if a reduction chain was detected.  */
1769 
1770 static bool
1771 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1772 {
1773   struct loop *loop = (gimple_bb (phi))->loop_father;
1774   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1775   enum tree_code code;
1776   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1777   stmt_vec_info use_stmt_info, current_stmt_info;
1778   tree lhs;
1779   imm_use_iterator imm_iter;
1780   use_operand_p use_p;
1781   int nloop_uses, size = 0, n_out_of_loop_uses;
1782   bool found = false;
1783 
1784   if (loop != vect_loop)
1785     return false;
1786 
1787   lhs = PHI_RESULT (phi);
1788   code = gimple_assign_rhs_code (first_stmt);
1789   while (1)
1790     {
1791       nloop_uses = 0;
1792       n_out_of_loop_uses = 0;
1793       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1794         {
1795 	  gimple use_stmt = USE_STMT (use_p);
1796           if (is_gimple_debug (use_stmt))
1797             continue;
1798 
1799 	  use_stmt = USE_STMT (use_p);
1800 
1801           /* Check if we got back to the reduction phi.  */
1802 	  if (use_stmt == phi)
1803             {
1804 	      loop_use_stmt = use_stmt;
1805               found = true;
1806               break;
1807             }
1808 
1809           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1810             {
1811               if (vinfo_for_stmt (use_stmt)
1812                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1813                 {
1814                   loop_use_stmt = use_stmt;
1815                   nloop_uses++;
1816                 }
1817             }
1818            else
1819              n_out_of_loop_uses++;
1820 
1821            /* There are can be either a single use in the loop or two uses in
1822               phi nodes.  */
1823            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1824              return false;
1825         }
1826 
1827       if (found)
1828         break;
1829 
1830       /* We reached a statement with no loop uses.  */
1831       if (nloop_uses == 0)
1832 	return false;
1833 
1834       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1835       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1836         return false;
1837 
1838       if (!is_gimple_assign (loop_use_stmt)
1839 	  || code != gimple_assign_rhs_code (loop_use_stmt)
1840 	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1841         return false;
1842 
1843       /* Insert USE_STMT into reduction chain.  */
1844       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1845       if (current_stmt)
1846         {
1847           current_stmt_info = vinfo_for_stmt (current_stmt);
1848 	  GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1849           GROUP_FIRST_ELEMENT (use_stmt_info)
1850             = GROUP_FIRST_ELEMENT (current_stmt_info);
1851         }
1852       else
1853 	GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1854 
1855       lhs = gimple_assign_lhs (loop_use_stmt);
1856       current_stmt = loop_use_stmt;
1857       size++;
1858    }
1859 
1860   if (!found || loop_use_stmt != phi || size < 2)
1861     return false;
1862 
1863   /* Swap the operands, if needed, to make the reduction operand be the second
1864      operand.  */
1865   lhs = PHI_RESULT (phi);
1866   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1867   while (next_stmt)
1868     {
1869       if (gimple_assign_rhs2 (next_stmt) == lhs)
1870 	{
1871 	  tree op = gimple_assign_rhs1 (next_stmt);
1872           gimple def_stmt = NULL;
1873 
1874           if (TREE_CODE (op) == SSA_NAME)
1875             def_stmt = SSA_NAME_DEF_STMT (op);
1876 
1877 	  /* Check that the other def is either defined in the loop
1878 	     ("vect_internal_def"), or it's an induction (defined by a
1879 	     loop-header phi-node).  */
1880           if (def_stmt
1881               && gimple_bb (def_stmt)
1882 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1883               && (is_gimple_assign (def_stmt)
1884                   || is_gimple_call (def_stmt)
1885                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1886                            == vect_induction_def
1887                   || (gimple_code (def_stmt) == GIMPLE_PHI
1888                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1889                                   == vect_internal_def
1890                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1891 	    {
1892 	      lhs = gimple_assign_lhs (next_stmt);
1893 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1894  	      continue;
1895 	    }
1896 
1897 	  return false;
1898 	}
1899       else
1900 	{
1901           tree op = gimple_assign_rhs2 (next_stmt);
1902           gimple def_stmt = NULL;
1903 
1904           if (TREE_CODE (op) == SSA_NAME)
1905             def_stmt = SSA_NAME_DEF_STMT (op);
1906 
1907           /* Check that the other def is either defined in the loop
1908             ("vect_internal_def"), or it's an induction (defined by a
1909             loop-header phi-node).  */
1910           if (def_stmt
1911               && gimple_bb (def_stmt)
1912 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1913               && (is_gimple_assign (def_stmt)
1914                   || is_gimple_call (def_stmt)
1915                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1916                               == vect_induction_def
1917                   || (gimple_code (def_stmt) == GIMPLE_PHI
1918                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1919                                   == vect_internal_def
1920                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1921   	    {
1922 	      if (vect_print_dump_info (REPORT_DETAILS))
1923 		{
1924 		  fprintf (vect_dump, "swapping oprnds: ");
1925 		  print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
1926 		}
1927 
1928 	      swap_tree_operands (next_stmt,
1929 	 		          gimple_assign_rhs1_ptr (next_stmt),
1930                                   gimple_assign_rhs2_ptr (next_stmt));
1931 	      mark_symbols_for_renaming (next_stmt);
1932 	    }
1933 	  else
1934 	    return false;
1935         }
1936 
1937       lhs = gimple_assign_lhs (next_stmt);
1938       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1939     }
1940 
1941   /* Save the chain for further analysis in SLP detection.  */
1942   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1943   VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
1944   GROUP_SIZE (vinfo_for_stmt (first)) = size;
1945 
1946   return true;
1947 }
1948 
1949 
1950 /* Function vect_is_simple_reduction_1
1951 
1952    (1) Detect a cross-iteration def-use cycle that represents a simple
1953    reduction computation.  We look for the following pattern:
1954 
1955    loop_header:
1956      a1 = phi < a0, a2 >
1957      a3 = ...
1958      a2 = operation (a3, a1)
1959 
1960    such that:
1961    1. operation is commutative and associative and it is safe to
1962       change the order of the computation (if CHECK_REDUCTION is true)
1963    2. no uses for a2 in the loop (a2 is used out of the loop)
1964    3. no uses of a1 in the loop besides the reduction operation
1965    4. no uses of a1 outside the loop.
1966 
1967    Conditions 1,4 are tested here.
1968    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1969 
1970    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1971    nested cycles, if CHECK_REDUCTION is false.
1972 
1973    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1974    reductions:
1975 
1976      a1 = phi < a0, a2 >
1977      inner loop (def of a3)
1978      a2 = phi < a3 >
1979 
1980    If MODIFY is true it tries also to rework the code in-place to enable
1981    detection of more reduction patterns.  For the time being we rewrite
1982    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1983 */
1984 
1985 static gimple
1986 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
1987 			    bool check_reduction, bool *double_reduc,
1988 			    bool modify)
1989 {
1990   struct loop *loop = (gimple_bb (phi))->loop_father;
1991   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1992   edge latch_e = loop_latch_edge (loop);
1993   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1994   gimple def_stmt, def1 = NULL, def2 = NULL;
1995   enum tree_code orig_code, code;
1996   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1997   tree type;
1998   int nloop_uses;
1999   tree name;
2000   imm_use_iterator imm_iter;
2001   use_operand_p use_p;
2002   bool phi_def;
2003 
2004   *double_reduc = false;
2005 
2006   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2007      otherwise, we assume outer loop vectorization.  */
2008   gcc_assert ((check_reduction && loop == vect_loop)
2009               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2010 
2011   name = PHI_RESULT (phi);
2012   nloop_uses = 0;
2013   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2014     {
2015       gimple use_stmt = USE_STMT (use_p);
2016       if (is_gimple_debug (use_stmt))
2017 	continue;
2018 
2019       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2020         {
2021           if (vect_print_dump_info (REPORT_DETAILS))
2022             fprintf (vect_dump, "intermediate value used outside loop.");
2023 
2024           return NULL;
2025         }
2026 
2027       if (vinfo_for_stmt (use_stmt)
2028 	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2029         nloop_uses++;
2030       if (nloop_uses > 1)
2031         {
2032           if (vect_print_dump_info (REPORT_DETAILS))
2033             fprintf (vect_dump, "reduction used in loop.");
2034           return NULL;
2035         }
2036     }
2037 
2038   if (TREE_CODE (loop_arg) != SSA_NAME)
2039     {
2040       if (vect_print_dump_info (REPORT_DETAILS))
2041 	{
2042 	  fprintf (vect_dump, "reduction: not ssa_name: ");
2043 	  print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2044 	}
2045       return NULL;
2046     }
2047 
2048   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2049   if (!def_stmt)
2050     {
2051       if (vect_print_dump_info (REPORT_DETAILS))
2052 	fprintf (vect_dump, "reduction: no def_stmt.");
2053       return NULL;
2054     }
2055 
2056   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2057     {
2058       if (vect_print_dump_info (REPORT_DETAILS))
2059         print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
2060       return NULL;
2061     }
2062 
2063   if (is_gimple_assign (def_stmt))
2064     {
2065       name = gimple_assign_lhs (def_stmt);
2066       phi_def = false;
2067     }
2068   else
2069     {
2070       name = PHI_RESULT (def_stmt);
2071       phi_def = true;
2072     }
2073 
2074   nloop_uses = 0;
2075   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2076     {
2077       gimple use_stmt = USE_STMT (use_p);
2078       if (is_gimple_debug (use_stmt))
2079 	continue;
2080       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2081 	  && vinfo_for_stmt (use_stmt)
2082 	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2083 	nloop_uses++;
2084       if (nloop_uses > 1)
2085 	{
2086 	  if (vect_print_dump_info (REPORT_DETAILS))
2087 	    fprintf (vect_dump, "reduction used in loop.");
2088 	  return NULL;
2089 	}
2090     }
2091 
2092   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2093      defined in the inner loop.  */
2094   if (phi_def)
2095     {
2096       op1 = PHI_ARG_DEF (def_stmt, 0);
2097 
2098       if (gimple_phi_num_args (def_stmt) != 1
2099           || TREE_CODE (op1) != SSA_NAME)
2100         {
2101           if (vect_print_dump_info (REPORT_DETAILS))
2102             fprintf (vect_dump, "unsupported phi node definition.");
2103 
2104           return NULL;
2105         }
2106 
2107       def1 = SSA_NAME_DEF_STMT (op1);
2108       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2109           && loop->inner
2110           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2111           && is_gimple_assign (def1))
2112         {
2113           if (vect_print_dump_info (REPORT_DETAILS))
2114             report_vect_op (def_stmt, "detected double reduction: ");
2115 
2116           *double_reduc = true;
2117           return def_stmt;
2118         }
2119 
2120       return NULL;
2121     }
2122 
2123   code = orig_code = gimple_assign_rhs_code (def_stmt);
2124 
2125   /* We can handle "res -= x[i]", which is non-associative by
2126      simply rewriting this into "res += -x[i]".  Avoid changing
2127      gimple instruction for the first simple tests and only do this
2128      if we're allowed to change code at all.  */
2129   if (code == MINUS_EXPR
2130       && modify
2131       && (op1 = gimple_assign_rhs1 (def_stmt))
2132       && TREE_CODE (op1) == SSA_NAME
2133       && SSA_NAME_DEF_STMT (op1) == phi)
2134     code = PLUS_EXPR;
2135 
2136   if (check_reduction
2137       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2138     {
2139       if (vect_print_dump_info (REPORT_DETAILS))
2140         report_vect_op (def_stmt, "reduction: not commutative/associative: ");
2141       return NULL;
2142     }
2143 
2144   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2145     {
2146       if (code != COND_EXPR)
2147         {
2148           if (vect_print_dump_info (REPORT_DETAILS))
2149 	    report_vect_op (def_stmt, "reduction: not binary operation: ");
2150 
2151           return NULL;
2152         }
2153 
2154       op3 = gimple_assign_rhs1 (def_stmt);
2155       if (COMPARISON_CLASS_P (op3))
2156         {
2157           op4 = TREE_OPERAND (op3, 1);
2158           op3 = TREE_OPERAND (op3, 0);
2159         }
2160 
2161       op1 = gimple_assign_rhs2 (def_stmt);
2162       op2 = gimple_assign_rhs3 (def_stmt);
2163 
2164       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2165         {
2166           if (vect_print_dump_info (REPORT_DETAILS))
2167             report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2168 
2169           return NULL;
2170         }
2171     }
2172   else
2173     {
2174       op1 = gimple_assign_rhs1 (def_stmt);
2175       op2 = gimple_assign_rhs2 (def_stmt);
2176 
2177       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2178         {
2179           if (vect_print_dump_info (REPORT_DETAILS))
2180 	    report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2181 
2182           return NULL;
2183         }
2184    }
2185 
2186   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2187   if ((TREE_CODE (op1) == SSA_NAME
2188        && !types_compatible_p (type,TREE_TYPE (op1)))
2189       || (TREE_CODE (op2) == SSA_NAME
2190           && !types_compatible_p (type, TREE_TYPE (op2)))
2191       || (op3 && TREE_CODE (op3) == SSA_NAME
2192           && !types_compatible_p (type, TREE_TYPE (op3)))
2193       || (op4 && TREE_CODE (op4) == SSA_NAME
2194           && !types_compatible_p (type, TREE_TYPE (op4))))
2195     {
2196       if (vect_print_dump_info (REPORT_DETAILS))
2197         {
2198           fprintf (vect_dump, "reduction: multiple types: operation type: ");
2199           print_generic_expr (vect_dump, type, TDF_SLIM);
2200           fprintf (vect_dump, ", operands types: ");
2201           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2202           fprintf (vect_dump, ",");
2203           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2204           if (op3)
2205             {
2206               fprintf (vect_dump, ",");
2207               print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
2208             }
2209 
2210           if (op4)
2211             {
2212               fprintf (vect_dump, ",");
2213               print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
2214             }
2215         }
2216 
2217       return NULL;
2218     }
2219 
2220   /* Check that it's ok to change the order of the computation.
2221      Generally, when vectorizing a reduction we change the order of the
2222      computation.  This may change the behavior of the program in some
2223      cases, so we need to check that this is ok.  One exception is when
2224      vectorizing an outer-loop: the inner-loop is executed sequentially,
2225      and therefore vectorizing reductions in the inner-loop during
2226      outer-loop vectorization is safe.  */
2227 
2228   /* CHECKME: check for !flag_finite_math_only too?  */
2229   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2230       && check_reduction)
2231     {
2232       /* Changing the order of operations changes the semantics.  */
2233       if (vect_print_dump_info (REPORT_DETAILS))
2234 	report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
2235       return NULL;
2236     }
2237   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2238 	   && check_reduction)
2239     {
2240       /* Changing the order of operations changes the semantics.  */
2241       if (vect_print_dump_info (REPORT_DETAILS))
2242 	report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
2243       return NULL;
2244     }
2245   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2246     {
2247       /* Changing the order of operations changes the semantics.  */
2248       if (vect_print_dump_info (REPORT_DETAILS))
2249 	report_vect_op (def_stmt,
2250 			"reduction: unsafe fixed-point math optimization: ");
2251       return NULL;
2252     }
2253 
2254   /* If we detected "res -= x[i]" earlier, rewrite it into
2255      "res += -x[i]" now.  If this turns out to be useless reassoc
2256      will clean it up again.  */
2257   if (orig_code == MINUS_EXPR)
2258     {
2259       tree rhs = gimple_assign_rhs2 (def_stmt);
2260       tree var = TREE_CODE (rhs) == SSA_NAME
2261 		 ? SSA_NAME_VAR (rhs)
2262 		 : create_tmp_reg (TREE_TYPE (rhs), NULL);
2263       tree negrhs = make_ssa_name (var, NULL);
2264       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2265 							 rhs, NULL);
2266       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2267       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2268 							  loop_info, NULL));
2269       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2270       gimple_assign_set_rhs2 (def_stmt, negrhs);
2271       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2272       update_stmt (def_stmt);
2273     }
2274 
2275   /* Reduction is safe. We're dealing with one of the following:
2276      1) integer arithmetic and no trapv
2277      2) floating point arithmetic, and special flags permit this optimization
2278      3) nested cycle (i.e., outer loop vectorization).  */
2279   if (TREE_CODE (op1) == SSA_NAME)
2280     def1 = SSA_NAME_DEF_STMT (op1);
2281 
2282   if (TREE_CODE (op2) == SSA_NAME)
2283     def2 = SSA_NAME_DEF_STMT (op2);
2284 
2285   if (code != COND_EXPR
2286       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2287     {
2288       if (vect_print_dump_info (REPORT_DETAILS))
2289 	report_vect_op (def_stmt, "reduction: no defs for operands: ");
2290       return NULL;
2291     }
2292 
2293   /* Check that one def is the reduction def, defined by PHI,
2294      the other def is either defined in the loop ("vect_internal_def"),
2295      or it's an induction (defined by a loop-header phi-node).  */
2296 
2297   if (def2 && def2 == phi
2298       && (code == COND_EXPR
2299 	  || !def1 || gimple_nop_p (def1)
2300           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2301               && (is_gimple_assign (def1)
2302 		  || is_gimple_call (def1)
2303   	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2304                       == vect_induction_def
2305    	          || (gimple_code (def1) == GIMPLE_PHI
2306 	              && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2307                           == vect_internal_def
2308  	              && !is_loop_header_bb_p (gimple_bb (def1)))))))
2309     {
2310       if (vect_print_dump_info (REPORT_DETAILS))
2311 	report_vect_op (def_stmt, "detected reduction: ");
2312       return def_stmt;
2313     }
2314 
2315   if (def1 && def1 == phi
2316       && (code == COND_EXPR
2317 	  || !def2 || gimple_nop_p (def2)
2318           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2319  	      && (is_gimple_assign (def2)
2320 		  || is_gimple_call (def2)
2321 	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2322                       == vect_induction_def
2323  	          || (gimple_code (def2) == GIMPLE_PHI
2324 		      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2325                           == vect_internal_def
2326 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
2327     {
2328       if (check_reduction)
2329         {
2330           /* Swap operands (just for simplicity - so that the rest of the code
2331 	     can assume that the reduction variable is always the last (second)
2332 	     argument).  */
2333           if (vect_print_dump_info (REPORT_DETAILS))
2334 	    report_vect_op (def_stmt,
2335 	  	            "detected reduction: need to swap operands: ");
2336 
2337           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2338  			      gimple_assign_rhs2_ptr (def_stmt));
2339         }
2340       else
2341         {
2342           if (vect_print_dump_info (REPORT_DETAILS))
2343             report_vect_op (def_stmt, "detected reduction: ");
2344         }
2345 
2346       return def_stmt;
2347     }
2348 
2349   /* Try to find SLP reduction chain.  */
2350   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2351     {
2352       if (vect_print_dump_info (REPORT_DETAILS))
2353         report_vect_op (def_stmt, "reduction: detected reduction chain: ");
2354 
2355       return def_stmt;
2356     }
2357 
2358   if (vect_print_dump_info (REPORT_DETAILS))
2359     report_vect_op (def_stmt, "reduction: unknown pattern: ");
2360 
2361   return NULL;
2362 }
2363 
2364 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2365    in-place.  Arguments as there.  */
2366 
2367 static gimple
2368 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2369                           bool check_reduction, bool *double_reduc)
2370 {
2371   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2372 				     double_reduc, false);
2373 }
2374 
2375 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2376    in-place if it enables detection of more reductions.  Arguments
2377    as there.  */
2378 
2379 gimple
2380 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2381                           bool check_reduction, bool *double_reduc)
2382 {
2383   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2384 				     double_reduc, true);
2385 }
2386 
2387 /* Calculate the cost of one scalar iteration of the loop.  */
2388 int
2389 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2390 {
2391   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2392   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2393   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2394   int innerloop_iters, i, stmt_cost;
2395 
2396   /* Count statements in scalar loop.  Using this as scalar cost for a single
2397      iteration for now.
2398 
2399      TODO: Add outer loop support.
2400 
2401      TODO: Consider assigning different costs to different scalar
2402      statements.  */
2403 
2404   /* FORNOW.  */
2405   innerloop_iters = 1;
2406   if (loop->inner)
2407     innerloop_iters = 50; /* FIXME */
2408 
2409   for (i = 0; i < nbbs; i++)
2410     {
2411       gimple_stmt_iterator si;
2412       basic_block bb = bbs[i];
2413 
2414       if (bb->loop_father == loop->inner)
2415         factor = innerloop_iters;
2416       else
2417         factor = 1;
2418 
2419       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2420         {
2421           gimple stmt = gsi_stmt (si);
2422           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2423 
2424           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2425             continue;
2426 
2427           /* Skip stmts that are not vectorized inside the loop.  */
2428           if (stmt_info
2429               && !STMT_VINFO_RELEVANT_P (stmt_info)
2430               && (!STMT_VINFO_LIVE_P (stmt_info)
2431                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2432 	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2433             continue;
2434 
2435           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2436             {
2437               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2438                stmt_cost = vect_get_cost (scalar_load);
2439              else
2440                stmt_cost = vect_get_cost (scalar_store);
2441             }
2442           else
2443             stmt_cost = vect_get_cost (scalar_stmt);
2444 
2445           scalar_single_iter_cost += stmt_cost * factor;
2446         }
2447     }
2448   return scalar_single_iter_cost;
2449 }
2450 
2451 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2452 int
2453 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2454                              int *peel_iters_epilogue,
2455                              int scalar_single_iter_cost)
2456 {
2457   int peel_guard_costs = 0;
2458   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2459 
2460   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2461     {
2462       *peel_iters_epilogue = vf/2;
2463       if (vect_print_dump_info (REPORT_COST))
2464         fprintf (vect_dump, "cost model: "
2465                             "epilogue peel iters set to vf/2 because "
2466                             "loop iterations are unknown .");
2467 
2468       /* If peeled iterations are known but number of scalar loop
2469          iterations are unknown, count a taken branch per peeled loop.  */
2470       peel_guard_costs =  2 * vect_get_cost (cond_branch_taken);
2471     }
2472   else
2473     {
2474       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2475       peel_iters_prologue = niters < peel_iters_prologue ?
2476                             niters : peel_iters_prologue;
2477       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2478       /* If we need to peel for gaps, but no peeling is required, we have to
2479 	 peel VF iterations.  */
2480       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2481         *peel_iters_epilogue = vf;
2482     }
2483 
2484    return (peel_iters_prologue * scalar_single_iter_cost)
2485             + (*peel_iters_epilogue * scalar_single_iter_cost)
2486            + peel_guard_costs;
2487 }
2488 
2489 /* Function vect_estimate_min_profitable_iters
2490 
2491    Return the number of iterations required for the vector version of the
2492    loop to be profitable relative to the cost of the scalar version of the
2493    loop.
2494 
2495    TODO: Take profile info into account before making vectorization
2496    decisions, if available.  */
2497 
2498 int
2499 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
2500 {
2501   int i;
2502   int min_profitable_iters;
2503   int peel_iters_prologue;
2504   int peel_iters_epilogue;
2505   int vec_inside_cost = 0;
2506   int vec_outside_cost = 0;
2507   int scalar_single_iter_cost = 0;
2508   int scalar_outside_cost = 0;
2509   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2510   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2511   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2512   int nbbs = loop->num_nodes;
2513   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2514   int peel_guard_costs = 0;
2515   int innerloop_iters = 0, factor;
2516   VEC (slp_instance, heap) *slp_instances;
2517   slp_instance instance;
2518 
2519   /* Cost model disabled.  */
2520   if (!flag_vect_cost_model)
2521     {
2522       if (vect_print_dump_info (REPORT_COST))
2523         fprintf (vect_dump, "cost model disabled.");
2524       return 0;
2525     }
2526 
2527   /* Requires loop versioning tests to handle misalignment.  */
2528   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2529     {
2530       /*  FIXME: Make cost depend on complexity of individual check.  */
2531       vec_outside_cost +=
2532 	VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2533       if (vect_print_dump_info (REPORT_COST))
2534         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2535                  "versioning to treat misalignment.\n");
2536     }
2537 
2538   /* Requires loop versioning with alias checks.  */
2539   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2540     {
2541       /*  FIXME: Make cost depend on complexity of individual check.  */
2542       vec_outside_cost +=
2543         VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2544       if (vect_print_dump_info (REPORT_COST))
2545         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2546                  "versioning aliasing.\n");
2547     }
2548 
2549   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2550       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2551     vec_outside_cost += vect_get_cost (cond_branch_taken);
2552 
2553   /* Count statements in scalar loop.  Using this as scalar cost for a single
2554      iteration for now.
2555 
2556      TODO: Add outer loop support.
2557 
2558      TODO: Consider assigning different costs to different scalar
2559      statements.  */
2560 
2561   /* FORNOW.  */
2562   if (loop->inner)
2563     innerloop_iters = 50; /* FIXME */
2564 
2565   for (i = 0; i < nbbs; i++)
2566     {
2567       gimple_stmt_iterator si;
2568       basic_block bb = bbs[i];
2569 
2570       if (bb->loop_father == loop->inner)
2571  	factor = innerloop_iters;
2572       else
2573  	factor = 1;
2574 
2575       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2576 	{
2577 	  gimple stmt = gsi_stmt (si);
2578 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2579 
2580 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2581 	    {
2582 	      stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2583 	      stmt_info = vinfo_for_stmt (stmt);
2584 	    }
2585 
2586 	  /* Skip stmts that are not vectorized inside the loop.  */
2587 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
2588 	      && (!STMT_VINFO_LIVE_P (stmt_info)
2589                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
2590 	    continue;
2591 
2592 	  vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
2593 	  /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2594 	     some of the "outside" costs are generated inside the outer-loop.  */
2595 	  vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
2596           if (is_pattern_stmt_p (stmt_info)
2597 	      && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
2598             {
2599 	      gimple_stmt_iterator gsi;
2600 
2601 	      for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2602 		   !gsi_end_p (gsi); gsi_next (&gsi))
2603                 {
2604                   gimple pattern_def_stmt = gsi_stmt (gsi);
2605                   stmt_vec_info pattern_def_stmt_info
2606 		    = vinfo_for_stmt (pattern_def_stmt);
2607                   if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
2608                       || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
2609 		    {
2610                       vec_inside_cost
2611 			+= STMT_VINFO_INSIDE_OF_LOOP_COST
2612 			   (pattern_def_stmt_info) * factor;
2613                       vec_outside_cost
2614 			+= STMT_VINFO_OUTSIDE_OF_LOOP_COST
2615 			   (pattern_def_stmt_info);
2616                     }
2617 		}
2618 	    }
2619 	}
2620     }
2621 
2622   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2623 
2624   /* Add additional cost for the peeled instructions in prologue and epilogue
2625      loop.
2626 
2627      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2628      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2629 
2630      TODO: Build an expression that represents peel_iters for prologue and
2631      epilogue to be used in a run-time test.  */
2632 
2633   if (npeel  < 0)
2634     {
2635       peel_iters_prologue = vf/2;
2636       if (vect_print_dump_info (REPORT_COST))
2637         fprintf (vect_dump, "cost model: "
2638                  "prologue peel iters set to vf/2.");
2639 
2640       /* If peeling for alignment is unknown, loop bound of main loop becomes
2641          unknown.  */
2642       peel_iters_epilogue = vf/2;
2643       if (vect_print_dump_info (REPORT_COST))
2644         fprintf (vect_dump, "cost model: "
2645                  "epilogue peel iters set to vf/2 because "
2646                  "peeling for alignment is unknown .");
2647 
2648       /* If peeled iterations are unknown, count a taken branch and a not taken
2649          branch per peeled loop. Even if scalar loop iterations are known,
2650          vector iterations are not known since peeled prologue iterations are
2651          not known. Hence guards remain the same.  */
2652       peel_guard_costs +=  2 * (vect_get_cost (cond_branch_taken)
2653                                 + vect_get_cost (cond_branch_not_taken));
2654       vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2655                            + (peel_iters_epilogue * scalar_single_iter_cost)
2656                            + peel_guard_costs;
2657     }
2658   else
2659     {
2660       peel_iters_prologue = npeel;
2661       vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
2662                                     peel_iters_prologue, &peel_iters_epilogue,
2663                                     scalar_single_iter_cost);
2664     }
2665 
2666   /* FORNOW: The scalar outside cost is incremented in one of the
2667      following ways:
2668 
2669      1. The vectorizer checks for alignment and aliasing and generates
2670      a condition that allows dynamic vectorization.  A cost model
2671      check is ANDED with the versioning condition.  Hence scalar code
2672      path now has the added cost of the versioning check.
2673 
2674        if (cost > th & versioning_check)
2675          jmp to vector code
2676 
2677      Hence run-time scalar is incremented by not-taken branch cost.
2678 
2679      2. The vectorizer then checks if a prologue is required.  If the
2680      cost model check was not done before during versioning, it has to
2681      be done before the prologue check.
2682 
2683        if (cost <= th)
2684          prologue = scalar_iters
2685        if (prologue == 0)
2686          jmp to vector code
2687        else
2688          execute prologue
2689        if (prologue == num_iters)
2690 	 go to exit
2691 
2692      Hence the run-time scalar cost is incremented by a taken branch,
2693      plus a not-taken branch, plus a taken branch cost.
2694 
2695      3. The vectorizer then checks if an epilogue is required.  If the
2696      cost model check was not done before during prologue check, it
2697      has to be done with the epilogue check.
2698 
2699        if (prologue == 0)
2700          jmp to vector code
2701        else
2702          execute prologue
2703        if (prologue == num_iters)
2704 	 go to exit
2705        vector code:
2706          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2707            jmp to epilogue
2708 
2709      Hence the run-time scalar cost should be incremented by 2 taken
2710      branches.
2711 
2712      TODO: The back end may reorder the BBS's differently and reverse
2713      conditions/branch directions.  Change the estimates below to
2714      something more reasonable.  */
2715 
2716   /* If the number of iterations is known and we do not do versioning, we can
2717      decide whether to vectorize at compile time.  Hence the scalar version
2718      do not carry cost model guard costs.  */
2719   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2720       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2721       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2722     {
2723       /* Cost model check occurs at versioning.  */
2724       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2725           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2726 	scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
2727       else
2728 	{
2729 	  /* Cost model check occurs at prologue generation.  */
2730 	  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2731 	    scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
2732                                    + vect_get_cost (cond_branch_not_taken);
2733 	  /* Cost model check occurs at epilogue generation.  */
2734 	  else
2735 	    scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
2736 	}
2737     }
2738 
2739   /* Add SLP costs.  */
2740   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2741   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2742     {
2743       vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2744       vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2745     }
2746 
2747   /* Calculate number of iterations required to make the vector version
2748      profitable, relative to the loop bodies only.  The following condition
2749      must hold true:
2750      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2751      where
2752      SIC = scalar iteration cost, VIC = vector iteration cost,
2753      VOC = vector outside cost, VF = vectorization factor,
2754      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2755      SOC = scalar outside cost for run time cost model check.  */
2756 
2757   if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2758     {
2759       if (vec_outside_cost <= 0)
2760         min_profitable_iters = 1;
2761       else
2762         {
2763           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2764 				  - vec_inside_cost * peel_iters_prologue
2765                                   - vec_inside_cost * peel_iters_epilogue)
2766                                  / ((scalar_single_iter_cost * vf)
2767                                     - vec_inside_cost);
2768 
2769           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2770               <= ((vec_inside_cost * min_profitable_iters)
2771                   + ((vec_outside_cost - scalar_outside_cost) * vf)))
2772             min_profitable_iters++;
2773         }
2774     }
2775   /* vector version will never be profitable.  */
2776   else
2777     {
2778       if (vect_print_dump_info (REPORT_COST))
2779         fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2780 		 "divided by the scalar iteration cost = %d "
2781 		 "is greater or equal to the vectorization factor = %d.",
2782                  vec_inside_cost, scalar_single_iter_cost, vf);
2783       return -1;
2784     }
2785 
2786   if (vect_print_dump_info (REPORT_COST))
2787     {
2788       fprintf (vect_dump, "Cost model analysis: \n");
2789       fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
2790 	       vec_inside_cost);
2791       fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
2792 	       vec_outside_cost);
2793       fprintf (vect_dump, "  Scalar iteration cost: %d\n",
2794 	       scalar_single_iter_cost);
2795       fprintf (vect_dump, "  Scalar outside cost: %d\n", scalar_outside_cost);
2796       fprintf (vect_dump, "  prologue iterations: %d\n",
2797                peel_iters_prologue);
2798       fprintf (vect_dump, "  epilogue iterations: %d\n",
2799                peel_iters_epilogue);
2800       fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
2801 	       min_profitable_iters);
2802     }
2803 
2804   min_profitable_iters =
2805 	min_profitable_iters < vf ? vf : min_profitable_iters;
2806 
2807   /* Because the condition we create is:
2808      if (niters <= min_profitable_iters)
2809        then skip the vectorized loop.  */
2810   min_profitable_iters--;
2811 
2812   if (vect_print_dump_info (REPORT_COST))
2813     fprintf (vect_dump, "  Profitability threshold = %d\n",
2814 	     min_profitable_iters);
2815 
2816   return min_profitable_iters;
2817 }
2818 
2819 
2820 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2821    functions. Design better to avoid maintenance issues.  */
2822 
2823 /* Function vect_model_reduction_cost.
2824 
2825    Models cost for a reduction operation, including the vector ops
2826    generated within the strip-mine loop, the initial definition before
2827    the loop, and the epilogue code that must be generated.  */
2828 
2829 static bool
2830 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2831 			   int ncopies)
2832 {
2833   int outer_cost = 0;
2834   enum tree_code code;
2835   optab optab;
2836   tree vectype;
2837   gimple stmt, orig_stmt;
2838   tree reduction_op;
2839   enum machine_mode mode;
2840   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2841   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2842 
2843 
2844   /* Cost of reduction op inside loop.  */
2845   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2846     += ncopies * vect_get_cost (vector_stmt);
2847 
2848   stmt = STMT_VINFO_STMT (stmt_info);
2849 
2850   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2851     {
2852     case GIMPLE_SINGLE_RHS:
2853       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2854       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2855       break;
2856     case GIMPLE_UNARY_RHS:
2857       reduction_op = gimple_assign_rhs1 (stmt);
2858       break;
2859     case GIMPLE_BINARY_RHS:
2860       reduction_op = gimple_assign_rhs2 (stmt);
2861       break;
2862     case GIMPLE_TERNARY_RHS:
2863       reduction_op = gimple_assign_rhs3 (stmt);
2864       break;
2865     default:
2866       gcc_unreachable ();
2867     }
2868 
2869   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2870   if (!vectype)
2871     {
2872       if (vect_print_dump_info (REPORT_COST))
2873         {
2874           fprintf (vect_dump, "unsupported data-type ");
2875           print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2876         }
2877       return false;
2878    }
2879 
2880   mode = TYPE_MODE (vectype);
2881   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2882 
2883   if (!orig_stmt)
2884     orig_stmt = STMT_VINFO_STMT (stmt_info);
2885 
2886   code = gimple_assign_rhs_code (orig_stmt);
2887 
2888   /* Add in cost for initial definition.  */
2889   outer_cost += vect_get_cost (scalar_to_vec);
2890 
2891   /* Determine cost of epilogue code.
2892 
2893      We have a reduction operator that will reduce the vector in one statement.
2894      Also requires scalar extract.  */
2895 
2896   if (!nested_in_vect_loop_p (loop, orig_stmt))
2897     {
2898       if (reduc_code != ERROR_MARK)
2899 	outer_cost += vect_get_cost (vector_stmt)
2900                       + vect_get_cost (vec_to_scalar);
2901       else
2902 	{
2903 	  int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2904 	  tree bitsize =
2905 	    TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2906 	  int element_bitsize = tree_low_cst (bitsize, 1);
2907 	  int nelements = vec_size_in_bits / element_bitsize;
2908 
2909 	  optab = optab_for_tree_code (code, vectype, optab_default);
2910 
2911 	  /* We have a whole vector shift available.  */
2912 	  if (VECTOR_MODE_P (mode)
2913 	      && optab_handler (optab, mode) != CODE_FOR_nothing
2914 	      && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
2915 	    /* Final reduction via vector shifts and the reduction operator. Also
2916 	       requires scalar extract.  */
2917 	    outer_cost += ((exact_log2(nelements) * 2)
2918               * vect_get_cost (vector_stmt)
2919   	      + vect_get_cost (vec_to_scalar));
2920 	  else
2921 	    /* Use extracts and reduction op for final reduction.  For N elements,
2922                we have N extracts and N-1 reduction ops.  */
2923 	    outer_cost += ((nelements + nelements - 1)
2924               * vect_get_cost (vector_stmt));
2925 	}
2926     }
2927 
2928   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2929 
2930   if (vect_print_dump_info (REPORT_COST))
2931     fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2932              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2933              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2934 
2935   return true;
2936 }
2937 
2938 
2939 /* Function vect_model_induction_cost.
2940 
2941    Models cost for induction operations.  */
2942 
2943 static void
2944 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2945 {
2946   /* loop cost for vec_loop.  */
2947   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2948     = ncopies * vect_get_cost (vector_stmt);
2949   /* prologue cost for vec_init and vec_step.  */
2950   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
2951     = 2 * vect_get_cost (scalar_to_vec);
2952 
2953   if (vect_print_dump_info (REPORT_COST))
2954     fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2955              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2956              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2957 }
2958 
2959 
2960 /* Function get_initial_def_for_induction
2961 
2962    Input:
2963    STMT - a stmt that performs an induction operation in the loop.
2964    IV_PHI - the initial value of the induction variable
2965 
2966    Output:
2967    Return a vector variable, initialized with the first VF values of
2968    the induction variable.  E.g., for an iv with IV_PHI='X' and
2969    evolution S, for a vector of 4 units, we want to return:
2970    [X, X + S, X + 2*S, X + 3*S].  */
2971 
2972 static tree
2973 get_initial_def_for_induction (gimple iv_phi)
2974 {
2975   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2976   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2977   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2978   tree scalar_type;
2979   tree vectype;
2980   int nunits;
2981   edge pe = loop_preheader_edge (loop);
2982   struct loop *iv_loop;
2983   basic_block new_bb;
2984   tree vec, vec_init, vec_step, t;
2985   tree access_fn;
2986   tree new_var;
2987   tree new_name;
2988   gimple init_stmt, induction_phi, new_stmt;
2989   tree induc_def, vec_def, vec_dest;
2990   tree init_expr, step_expr;
2991   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2992   int i;
2993   bool ok;
2994   int ncopies;
2995   tree expr;
2996   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
2997   bool nested_in_vect_loop = false;
2998   gimple_seq stmts = NULL;
2999   imm_use_iterator imm_iter;
3000   use_operand_p use_p;
3001   gimple exit_phi;
3002   edge latch_e;
3003   tree loop_arg;
3004   gimple_stmt_iterator si;
3005   basic_block bb = gimple_bb (iv_phi);
3006   tree stepvectype;
3007   tree resvectype;
3008 
3009   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3010   if (nested_in_vect_loop_p (loop, iv_phi))
3011     {
3012       nested_in_vect_loop = true;
3013       iv_loop = loop->inner;
3014     }
3015   else
3016     iv_loop = loop;
3017   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3018 
3019   latch_e = loop_latch_edge (iv_loop);
3020   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3021 
3022   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3023   gcc_assert (access_fn);
3024   STRIP_NOPS (access_fn);
3025   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3026                                     &init_expr, &step_expr);
3027   gcc_assert (ok);
3028   pe = loop_preheader_edge (iv_loop);
3029 
3030   scalar_type = TREE_TYPE (init_expr);
3031   vectype = get_vectype_for_scalar_type (scalar_type);
3032   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3033   gcc_assert (vectype);
3034   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3035   ncopies = vf / nunits;
3036 
3037   gcc_assert (phi_info);
3038   gcc_assert (ncopies >= 1);
3039 
3040   /* Find the first insertion point in the BB.  */
3041   si = gsi_after_labels (bb);
3042 
3043   /* Create the vector that holds the initial_value of the induction.  */
3044   if (nested_in_vect_loop)
3045     {
3046       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3047 	 been created during vectorization of previous stmts.  We obtain it
3048 	 from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3049       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3050                                            loop_preheader_edge (iv_loop));
3051       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3052     }
3053   else
3054     {
3055       /* iv_loop is the loop to be vectorized. Create:
3056 	 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3057       new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
3058       add_referenced_var (new_var);
3059 
3060       new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
3061       if (stmts)
3062 	{
3063 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3064 	  gcc_assert (!new_bb);
3065 	}
3066 
3067       t = NULL_TREE;
3068       t = tree_cons (NULL_TREE, new_name, t);
3069       for (i = 1; i < nunits; i++)
3070 	{
3071 	  /* Create: new_name_i = new_name + step_expr  */
3072 	  enum tree_code code = POINTER_TYPE_P (scalar_type)
3073 				? POINTER_PLUS_EXPR : PLUS_EXPR;
3074 	  init_stmt = gimple_build_assign_with_ops (code, new_var,
3075 						    new_name, step_expr);
3076 	  new_name = make_ssa_name (new_var, init_stmt);
3077 	  gimple_assign_set_lhs (init_stmt, new_name);
3078 
3079 	  new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3080 	  gcc_assert (!new_bb);
3081 
3082 	  if (vect_print_dump_info (REPORT_DETAILS))
3083 	    {
3084 	      fprintf (vect_dump, "created new init_stmt: ");
3085 	      print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
3086 	    }
3087 	  t = tree_cons (NULL_TREE, new_name, t);
3088 	}
3089       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3090       vec = build_constructor_from_list (vectype, nreverse (t));
3091       vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
3092     }
3093 
3094 
3095   /* Create the vector that holds the step of the induction.  */
3096   if (nested_in_vect_loop)
3097     /* iv_loop is nested in the loop to be vectorized. Generate:
3098        vec_step = [S, S, S, S]  */
3099     new_name = step_expr;
3100   else
3101     {
3102       /* iv_loop is the loop to be vectorized. Generate:
3103 	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3104       expr = build_int_cst (TREE_TYPE (step_expr), vf);
3105       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3106 			      expr, step_expr);
3107     }
3108 
3109   t = unshare_expr (new_name);
3110   gcc_assert (CONSTANT_CLASS_P (new_name));
3111   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3112   gcc_assert (stepvectype);
3113   vec = build_vector_from_val (stepvectype, t);
3114   vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3115 
3116 
3117   /* Create the following def-use cycle:
3118      loop prolog:
3119          vec_init = ...
3120 	 vec_step = ...
3121      loop:
3122          vec_iv = PHI <vec_init, vec_loop>
3123          ...
3124          STMT
3125          ...
3126          vec_loop = vec_iv + vec_step;  */
3127 
3128   /* Create the induction-phi that defines the induction-operand.  */
3129   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3130   add_referenced_var (vec_dest);
3131   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3132   set_vinfo_for_stmt (induction_phi,
3133 		      new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3134   induc_def = PHI_RESULT (induction_phi);
3135 
3136   /* Create the iv update inside the loop  */
3137   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3138 					   induc_def, vec_step);
3139   vec_def = make_ssa_name (vec_dest, new_stmt);
3140   gimple_assign_set_lhs (new_stmt, vec_def);
3141   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3142   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3143                                                    NULL));
3144 
3145   /* Set the arguments of the phi node:  */
3146   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3147   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3148 	       UNKNOWN_LOCATION);
3149 
3150 
3151   /* In case that vectorization factor (VF) is bigger than the number
3152      of elements that we can fit in a vectype (nunits), we have to generate
3153      more than one vector stmt - i.e - we need to "unroll" the
3154      vector stmt by a factor VF/nunits.  For more details see documentation
3155      in vectorizable_operation.  */
3156 
3157   if (ncopies > 1)
3158     {
3159       stmt_vec_info prev_stmt_vinfo;
3160       /* FORNOW. This restriction should be relaxed.  */
3161       gcc_assert (!nested_in_vect_loop);
3162 
3163       /* Create the vector that holds the step of the induction.  */
3164       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3165       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3166 			      expr, step_expr);
3167       t = unshare_expr (new_name);
3168       gcc_assert (CONSTANT_CLASS_P (new_name));
3169       vec = build_vector_from_val (stepvectype, t);
3170       vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3171 
3172       vec_def = induc_def;
3173       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3174       for (i = 1; i < ncopies; i++)
3175 	{
3176 	  /* vec_i = vec_prev + vec_step  */
3177 	  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3178 						   vec_def, vec_step);
3179 	  vec_def = make_ssa_name (vec_dest, new_stmt);
3180 	  gimple_assign_set_lhs (new_stmt, vec_def);
3181 
3182 	  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3183 	  if (!useless_type_conversion_p (resvectype, vectype))
3184 	    {
3185 	      new_stmt = gimple_build_assign_with_ops
3186 		  (VIEW_CONVERT_EXPR,
3187 		   vect_get_new_vect_var (resvectype, vect_simple_var,
3188 					  "vec_iv_"),
3189 		   build1 (VIEW_CONVERT_EXPR, resvectype,
3190 			   gimple_assign_lhs (new_stmt)), NULL_TREE);
3191 	      gimple_assign_set_lhs (new_stmt,
3192 				     make_ssa_name
3193 				       (gimple_assign_lhs (new_stmt), new_stmt));
3194 	      gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3195 	    }
3196 	  set_vinfo_for_stmt (new_stmt,
3197 			      new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3198 	  STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3199 	  prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3200 	}
3201     }
3202 
3203   if (nested_in_vect_loop)
3204     {
3205       /* Find the loop-closed exit-phi of the induction, and record
3206          the final vector of induction results:  */
3207       exit_phi = NULL;
3208       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3209         {
3210 	  if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3211 	    {
3212 	      exit_phi = USE_STMT (use_p);
3213 	      break;
3214 	    }
3215         }
3216       if (exit_phi)
3217 	{
3218 	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3219 	  /* FORNOW. Currently not supporting the case that an inner-loop induction
3220 	     is not used in the outer-loop (i.e. only outside the outer-loop).  */
3221 	  gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3222 		      && !STMT_VINFO_LIVE_P (stmt_vinfo));
3223 
3224 	  STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3225 	  if (vect_print_dump_info (REPORT_DETAILS))
3226 	    {
3227 	      fprintf (vect_dump, "vector of inductions after inner-loop:");
3228 	      print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
3229 	    }
3230 	}
3231     }
3232 
3233 
3234   if (vect_print_dump_info (REPORT_DETAILS))
3235     {
3236       fprintf (vect_dump, "transform induction: created def-use cycle: ");
3237       print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
3238       fprintf (vect_dump, "\n");
3239       print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
3240     }
3241 
3242   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3243   if (!useless_type_conversion_p (resvectype, vectype))
3244     {
3245       new_stmt = gimple_build_assign_with_ops
3246 	 (VIEW_CONVERT_EXPR,
3247 	  vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3248 	  build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3249       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3250       gimple_assign_set_lhs (new_stmt, induc_def);
3251       si = gsi_start_bb (bb);
3252       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3253       set_vinfo_for_stmt (new_stmt,
3254 			  new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3255       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3256 	= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3257     }
3258 
3259   return induc_def;
3260 }
3261 
3262 
3263 /* Function get_initial_def_for_reduction
3264 
3265    Input:
3266    STMT - a stmt that performs a reduction operation in the loop.
3267    INIT_VAL - the initial value of the reduction variable
3268 
3269    Output:
3270    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3271         of the reduction (used for adjusting the epilog - see below).
3272    Return a vector variable, initialized according to the operation that STMT
3273         performs. This vector will be used as the initial value of the
3274         vector of partial results.
3275 
3276    Option1 (adjust in epilog): Initialize the vector as follows:
3277      add/bit or/xor:    [0,0,...,0,0]
3278      mult/bit and:      [1,1,...,1,1]
3279      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3280    and when necessary (e.g. add/mult case) let the caller know
3281    that it needs to adjust the result by init_val.
3282 
3283    Option2: Initialize the vector as follows:
3284      add/bit or/xor:    [init_val,0,0,...,0]
3285      mult/bit and:      [init_val,1,1,...,1]
3286      min/max/cond_expr: [init_val,init_val,...,init_val]
3287    and no adjustments are needed.
3288 
3289    For example, for the following code:
3290 
3291    s = init_val;
3292    for (i=0;i<n;i++)
3293      s = s + a[i];
3294 
3295    STMT is 's = s + a[i]', and the reduction variable is 's'.
3296    For a vector of 4 units, we want to return either [0,0,0,init_val],
3297    or [0,0,0,0] and let the caller know that it needs to adjust
3298    the result at the end by 'init_val'.
3299 
3300    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3301    initialization vector is simpler (same element in all entries), if
3302    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3303 
3304    A cost model should help decide between these two schemes.  */
3305 
3306 tree
3307 get_initial_def_for_reduction (gimple stmt, tree init_val,
3308                                tree *adjustment_def)
3309 {
3310   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3311   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3312   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3313   tree scalar_type = TREE_TYPE (init_val);
3314   tree vectype = get_vectype_for_scalar_type (scalar_type);
3315   int nunits;
3316   enum tree_code code = gimple_assign_rhs_code (stmt);
3317   tree def_for_init;
3318   tree init_def;
3319   tree t = NULL_TREE;
3320   int i;
3321   bool nested_in_vect_loop = false;
3322   tree init_value;
3323   REAL_VALUE_TYPE real_init_val = dconst0;
3324   int int_init_val = 0;
3325   gimple def_stmt = NULL;
3326 
3327   gcc_assert (vectype);
3328   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3329 
3330   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3331 	      || SCALAR_FLOAT_TYPE_P (scalar_type));
3332 
3333   if (nested_in_vect_loop_p (loop, stmt))
3334     nested_in_vect_loop = true;
3335   else
3336     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3337 
3338   /* In case of double reduction we only create a vector variable to be put
3339      in the reduction phi node.  The actual statement creation is done in
3340      vect_create_epilog_for_reduction.  */
3341   if (adjustment_def && nested_in_vect_loop
3342       && TREE_CODE (init_val) == SSA_NAME
3343       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3344       && gimple_code (def_stmt) == GIMPLE_PHI
3345       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3346       && vinfo_for_stmt (def_stmt)
3347       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3348           == vect_double_reduction_def)
3349     {
3350       *adjustment_def = NULL;
3351       return vect_create_destination_var (init_val, vectype);
3352     }
3353 
3354   if (TREE_CONSTANT (init_val))
3355     {
3356       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3357         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3358       else
3359         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3360     }
3361   else
3362     init_value = init_val;
3363 
3364   switch (code)
3365     {
3366       case WIDEN_SUM_EXPR:
3367       case DOT_PROD_EXPR:
3368       case PLUS_EXPR:
3369       case MINUS_EXPR:
3370       case BIT_IOR_EXPR:
3371       case BIT_XOR_EXPR:
3372       case MULT_EXPR:
3373       case BIT_AND_EXPR:
3374         /* ADJUSMENT_DEF is NULL when called from
3375            vect_create_epilog_for_reduction to vectorize double reduction.  */
3376         if (adjustment_def)
3377           {
3378             if (nested_in_vect_loop)
3379               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3380                                                               NULL);
3381             else
3382               *adjustment_def = init_val;
3383           }
3384 
3385         if (code == MULT_EXPR)
3386           {
3387             real_init_val = dconst1;
3388             int_init_val = 1;
3389           }
3390 
3391         if (code == BIT_AND_EXPR)
3392           int_init_val = -1;
3393 
3394         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3395           def_for_init = build_real (scalar_type, real_init_val);
3396         else
3397           def_for_init = build_int_cst (scalar_type, int_init_val);
3398 
3399         /* Create a vector of '0' or '1' except the first element.  */
3400         for (i = nunits - 2; i >= 0; --i)
3401           t = tree_cons (NULL_TREE, def_for_init, t);
3402 
3403         /* Option1: the first element is '0' or '1' as well.  */
3404         if (adjustment_def)
3405           {
3406             t = tree_cons (NULL_TREE, def_for_init, t);
3407             init_def = build_vector (vectype, t);
3408             break;
3409           }
3410 
3411         /* Option2: the first element is INIT_VAL.  */
3412         t = tree_cons (NULL_TREE, init_value, t);
3413         if (TREE_CONSTANT (init_val))
3414           init_def = build_vector (vectype, t);
3415         else
3416           init_def = build_constructor_from_list (vectype, t);
3417 
3418         break;
3419 
3420       case MIN_EXPR:
3421       case MAX_EXPR:
3422       case COND_EXPR:
3423         if (adjustment_def)
3424           {
3425             *adjustment_def = NULL_TREE;
3426             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3427             break;
3428           }
3429 
3430 	init_def = build_vector_from_val (vectype, init_value);
3431         break;
3432 
3433       default:
3434         gcc_unreachable ();
3435     }
3436 
3437   return init_def;
3438 }
3439 
3440 
3441 /* Function vect_create_epilog_for_reduction
3442 
3443    Create code at the loop-epilog to finalize the result of a reduction
3444    computation.
3445 
3446    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3447      reduction statements.
3448    STMT is the scalar reduction stmt that is being vectorized.
3449    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3450      number of elements that we can fit in a vectype (nunits).  In this case
3451      we have to generate more than one vector stmt - i.e - we need to "unroll"
3452      the vector stmt by a factor VF/nunits.  For more details see documentation
3453      in vectorizable_operation.
3454    REDUC_CODE is the tree-code for the epilog reduction.
3455    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3456      computation.
3457    REDUC_INDEX is the index of the operand in the right hand side of the
3458      statement that is defined by REDUCTION_PHI.
3459    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3460    SLP_NODE is an SLP node containing a group of reduction statements. The
3461      first one in this group is STMT.
3462 
3463    This function:
3464    1. Creates the reduction def-use cycles: sets the arguments for
3465       REDUCTION_PHIS:
3466       The loop-entry argument is the vectorized initial-value of the reduction.
3467       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3468       sums.
3469    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3470       by applying the operation specified by REDUC_CODE if available, or by
3471       other means (whole-vector shifts or a scalar loop).
3472       The function also creates a new phi node at the loop exit to preserve
3473       loop-closed form, as illustrated below.
3474 
3475      The flow at the entry to this function:
3476 
3477         loop:
3478           vec_def = phi <null, null>            # REDUCTION_PHI
3479           VECT_DEF = vector_stmt                # vectorized form of STMT
3480           s_loop = scalar_stmt                  # (scalar) STMT
3481         loop_exit:
3482           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3483           use <s_out0>
3484           use <s_out0>
3485 
3486      The above is transformed by this function into:
3487 
3488         loop:
3489           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3490           VECT_DEF = vector_stmt                # vectorized form of STMT
3491           s_loop = scalar_stmt                  # (scalar) STMT
3492         loop_exit:
3493           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3494           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3495           v_out2 = reduce <v_out1>
3496           s_out3 = extract_field <v_out2, 0>
3497           s_out4 = adjust_result <s_out3>
3498           use <s_out4>
3499           use <s_out4>
3500 */
3501 
3502 static void
3503 vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3504 				  int ncopies, enum tree_code reduc_code,
3505 				  VEC (gimple, heap) *reduction_phis,
3506                                   int reduc_index, bool double_reduc,
3507                                   slp_tree slp_node)
3508 {
3509   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3510   stmt_vec_info prev_phi_info;
3511   tree vectype;
3512   enum machine_mode mode;
3513   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3514   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3515   basic_block exit_bb;
3516   tree scalar_dest;
3517   tree scalar_type;
3518   gimple new_phi = NULL, phi;
3519   gimple_stmt_iterator exit_gsi;
3520   tree vec_dest;
3521   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3522   gimple epilog_stmt = NULL;
3523   enum tree_code code = gimple_assign_rhs_code (stmt);
3524   gimple exit_phi;
3525   tree bitsize, bitpos;
3526   tree adjustment_def = NULL;
3527   tree vec_initial_def = NULL;
3528   tree reduction_op, expr, def;
3529   tree orig_name, scalar_result;
3530   imm_use_iterator imm_iter, phi_imm_iter;
3531   use_operand_p use_p, phi_use_p;
3532   bool extract_scalar_result = false;
3533   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3534   bool nested_in_vect_loop = false;
3535   VEC (gimple, heap) *new_phis = NULL;
3536   VEC (gimple, heap) *inner_phis = NULL;
3537   enum vect_def_type dt = vect_unknown_def_type;
3538   int j, i;
3539   VEC (tree, heap) *scalar_results = NULL;
3540   unsigned int group_size = 1, k, ratio;
3541   VEC (tree, heap) *vec_initial_defs = NULL;
3542   VEC (gimple, heap) *phis;
3543   bool slp_reduc = false;
3544   tree new_phi_result;
3545   gimple inner_phi = NULL;
3546 
3547   if (slp_node)
3548     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3549 
3550   if (nested_in_vect_loop_p (loop, stmt))
3551     {
3552       outer_loop = loop;
3553       loop = loop->inner;
3554       nested_in_vect_loop = true;
3555       gcc_assert (!slp_node);
3556     }
3557 
3558   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3559     {
3560     case GIMPLE_SINGLE_RHS:
3561       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3562 		  == ternary_op);
3563       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3564       break;
3565     case GIMPLE_UNARY_RHS:
3566       reduction_op = gimple_assign_rhs1 (stmt);
3567       break;
3568     case GIMPLE_BINARY_RHS:
3569       reduction_op = reduc_index ?
3570                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3571       break;
3572     case GIMPLE_TERNARY_RHS:
3573       reduction_op = gimple_op (stmt, reduc_index + 1);
3574       break;
3575     default:
3576       gcc_unreachable ();
3577     }
3578 
3579   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3580   gcc_assert (vectype);
3581   mode = TYPE_MODE (vectype);
3582 
3583   /* 1. Create the reduction def-use cycle:
3584      Set the arguments of REDUCTION_PHIS, i.e., transform
3585 
3586         loop:
3587           vec_def = phi <null, null>            # REDUCTION_PHI
3588           VECT_DEF = vector_stmt                # vectorized form of STMT
3589           ...
3590 
3591      into:
3592 
3593         loop:
3594           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3595           VECT_DEF = vector_stmt                # vectorized form of STMT
3596           ...
3597 
3598      (in case of SLP, do it for all the phis). */
3599 
3600   /* Get the loop-entry arguments.  */
3601   if (slp_node)
3602     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3603                        NULL, slp_node, reduc_index);
3604   else
3605     {
3606       vec_initial_defs = VEC_alloc (tree, heap, 1);
3607      /* For the case of reduction, vect_get_vec_def_for_operand returns
3608         the scalar def before the loop, that defines the initial value
3609         of the reduction variable.  */
3610       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3611                                                       &adjustment_def);
3612       VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3613     }
3614 
3615   /* Set phi nodes arguments.  */
3616   FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3617     {
3618       tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3619       tree def = VEC_index (tree, vect_defs, i);
3620       for (j = 0; j < ncopies; j++)
3621         {
3622           /* Set the loop-entry arg of the reduction-phi.  */
3623           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3624                        UNKNOWN_LOCATION);
3625 
3626           /* Set the loop-latch arg for the reduction-phi.  */
3627           if (j > 0)
3628             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3629 
3630           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3631 
3632           if (vect_print_dump_info (REPORT_DETAILS))
3633             {
3634               fprintf (vect_dump, "transform reduction: created def-use"
3635                                   " cycle: ");
3636               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3637               fprintf (vect_dump, "\n");
3638               print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0,
3639                                  TDF_SLIM);
3640             }
3641 
3642           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3643         }
3644     }
3645 
3646   VEC_free (tree, heap, vec_initial_defs);
3647 
3648   /* 2. Create epilog code.
3649         The reduction epilog code operates across the elements of the vector
3650         of partial results computed by the vectorized loop.
3651         The reduction epilog code consists of:
3652 
3653         step 1: compute the scalar result in a vector (v_out2)
3654         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3655         step 3: adjust the scalar result (s_out3) if needed.
3656 
3657         Step 1 can be accomplished using one the following three schemes:
3658           (scheme 1) using reduc_code, if available.
3659           (scheme 2) using whole-vector shifts, if available.
3660           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3661                      combined.
3662 
3663           The overall epilog code looks like this:
3664 
3665           s_out0 = phi <s_loop>         # original EXIT_PHI
3666           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3667           v_out2 = reduce <v_out1>              # step 1
3668           s_out3 = extract_field <v_out2, 0>    # step 2
3669           s_out4 = adjust_result <s_out3>       # step 3
3670 
3671           (step 3 is optional, and steps 1 and 2 may be combined).
3672           Lastly, the uses of s_out0 are replaced by s_out4.  */
3673 
3674 
3675   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3676          v_out1 = phi <VECT_DEF>
3677          Store them in NEW_PHIS.  */
3678 
3679   exit_bb = single_exit (loop)->dest;
3680   prev_phi_info = NULL;
3681   new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3682   FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3683     {
3684       for (j = 0; j < ncopies; j++)
3685         {
3686           phi = create_phi_node (SSA_NAME_VAR (def), exit_bb);
3687           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3688           if (j == 0)
3689             VEC_quick_push (gimple, new_phis, phi);
3690           else
3691 	    {
3692 	      def = vect_get_vec_def_for_stmt_copy (dt, def);
3693 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3694 	    }
3695 
3696           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3697           prev_phi_info = vinfo_for_stmt (phi);
3698         }
3699     }
3700 
3701   /* The epilogue is created for the outer-loop, i.e., for the loop being
3702      vectorized.  Create exit phis for the outer loop.  */
3703   if (double_reduc)
3704     {
3705       loop = outer_loop;
3706       exit_bb = single_exit (loop)->dest;
3707       inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3708       FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
3709 	{
3710 	  gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3711 					      exit_bb);
3712 	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3713 			   PHI_RESULT (phi));
3714 	  set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3715 							    loop_vinfo, NULL));
3716 	  VEC_quick_push (gimple, inner_phis, phi);
3717 	  VEC_replace (gimple, new_phis, i, outer_phi);
3718 	  prev_phi_info = vinfo_for_stmt (outer_phi);
3719           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3720             {
3721 	      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3722 	      outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3723 					   exit_bb);
3724 	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3725 			       PHI_RESULT (phi));
3726 	      set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3727 							loop_vinfo, NULL));
3728 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3729 	      prev_phi_info = vinfo_for_stmt (outer_phi);
3730 	    }
3731 	}
3732     }
3733 
3734   exit_gsi = gsi_after_labels (exit_bb);
3735 
3736   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3737          (i.e. when reduc_code is not available) and in the final adjustment
3738 	 code (if needed).  Also get the original scalar reduction variable as
3739          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3740          represents a reduction pattern), the tree-code and scalar-def are
3741          taken from the original stmt that the pattern-stmt (STMT) replaces.
3742          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3743          are taken from STMT.  */
3744 
3745   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3746   if (!orig_stmt)
3747     {
3748       /* Regular reduction  */
3749       orig_stmt = stmt;
3750     }
3751   else
3752     {
3753       /* Reduction pattern  */
3754       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3755       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3756       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3757     }
3758 
3759   code = gimple_assign_rhs_code (orig_stmt);
3760   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3761      partial results are added and not subtracted.  */
3762   if (code == MINUS_EXPR)
3763     code = PLUS_EXPR;
3764 
3765   scalar_dest = gimple_assign_lhs (orig_stmt);
3766   scalar_type = TREE_TYPE (scalar_dest);
3767   scalar_results = VEC_alloc (tree, heap, group_size);
3768   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3769   bitsize = TYPE_SIZE (scalar_type);
3770 
3771   /* In case this is a reduction in an inner-loop while vectorizing an outer
3772      loop - we don't need to extract a single scalar result at the end of the
3773      inner-loop (unless it is double reduction, i.e., the use of reduction is
3774      outside the outer-loop).  The final vector of partial results will be used
3775      in the vectorized outer-loop, or reduced to a scalar result at the end of
3776      the outer-loop.  */
3777   if (nested_in_vect_loop && !double_reduc)
3778     goto vect_finalize_reduction;
3779 
3780   /* SLP reduction without reduction chain, e.g.,
3781      # a1 = phi <a2, a0>
3782      # b1 = phi <b2, b0>
3783      a2 = operation (a1)
3784      b2 = operation (b1)  */
3785   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3786 
3787   /* In case of reduction chain, e.g.,
3788      # a1 = phi <a3, a0>
3789      a2 = operation (a1)
3790      a3 = operation (a2),
3791 
3792      we may end up with more than one vector result.  Here we reduce them to
3793      one vector.  */
3794   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3795     {
3796       tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3797       tree tmp;
3798       gimple new_vec_stmt = NULL;
3799 
3800       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3801       for (k = 1; k < VEC_length (gimple, new_phis); k++)
3802         {
3803           gimple next_phi = VEC_index (gimple, new_phis, k);
3804           tree second_vect = PHI_RESULT (next_phi);
3805 
3806           tmp = build2 (code, vectype,  first_vect, second_vect);
3807           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3808           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3809           gimple_assign_set_lhs (new_vec_stmt, first_vect);
3810           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3811         }
3812 
3813       new_phi_result = first_vect;
3814       if (new_vec_stmt)
3815         {
3816           VEC_truncate (gimple, new_phis, 0);
3817           VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
3818         }
3819     }
3820   else
3821     new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3822 
3823   /* 2.3 Create the reduction code, using one of the three schemes described
3824          above. In SLP we simply need to extract all the elements from the
3825          vector (without reducing them), so we use scalar shifts.  */
3826   if (reduc_code != ERROR_MARK && !slp_reduc)
3827     {
3828       tree tmp;
3829 
3830       /*** Case 1:  Create:
3831            v_out2 = reduc_expr <v_out1>  */
3832 
3833       if (vect_print_dump_info (REPORT_DETAILS))
3834         fprintf (vect_dump, "Reduce using direct vector reduction.");
3835 
3836       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3837       tmp = build1 (reduc_code, vectype, new_phi_result);
3838       epilog_stmt = gimple_build_assign (vec_dest, tmp);
3839       new_temp = make_ssa_name (vec_dest, epilog_stmt);
3840       gimple_assign_set_lhs (epilog_stmt, new_temp);
3841       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3842 
3843       extract_scalar_result = true;
3844     }
3845   else
3846     {
3847       enum tree_code shift_code = ERROR_MARK;
3848       bool have_whole_vector_shift = true;
3849       int bit_offset;
3850       int element_bitsize = tree_low_cst (bitsize, 1);
3851       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3852       tree vec_temp;
3853 
3854       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3855         shift_code = VEC_RSHIFT_EXPR;
3856       else
3857         have_whole_vector_shift = false;
3858 
3859       /* Regardless of whether we have a whole vector shift, if we're
3860          emulating the operation via tree-vect-generic, we don't want
3861          to use it.  Only the first round of the reduction is likely
3862          to still be profitable via emulation.  */
3863       /* ??? It might be better to emit a reduction tree code here, so that
3864          tree-vect-generic can expand the first round via bit tricks.  */
3865       if (!VECTOR_MODE_P (mode))
3866         have_whole_vector_shift = false;
3867       else
3868         {
3869           optab optab = optab_for_tree_code (code, vectype, optab_default);
3870           if (optab_handler (optab, mode) == CODE_FOR_nothing)
3871             have_whole_vector_shift = false;
3872         }
3873 
3874       if (have_whole_vector_shift && !slp_reduc)
3875         {
3876           /*** Case 2: Create:
3877              for (offset = VS/2; offset >= element_size; offset/=2)
3878                 {
3879                   Create:  va' = vec_shift <va, offset>
3880                   Create:  va = vop <va, va'>
3881                 }  */
3882 
3883           if (vect_print_dump_info (REPORT_DETAILS))
3884             fprintf (vect_dump, "Reduce using vector shifts");
3885 
3886           vec_dest = vect_create_destination_var (scalar_dest, vectype);
3887           new_temp = new_phi_result;
3888           for (bit_offset = vec_size_in_bits/2;
3889                bit_offset >= element_bitsize;
3890                bit_offset /= 2)
3891             {
3892               tree bitpos = size_int (bit_offset);
3893 
3894               epilog_stmt = gimple_build_assign_with_ops (shift_code,
3895                                                vec_dest, new_temp, bitpos);
3896               new_name = make_ssa_name (vec_dest, epilog_stmt);
3897               gimple_assign_set_lhs (epilog_stmt, new_name);
3898               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3899 
3900               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3901                                                           new_name, new_temp);
3902               new_temp = make_ssa_name (vec_dest, epilog_stmt);
3903               gimple_assign_set_lhs (epilog_stmt, new_temp);
3904               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3905             }
3906 
3907           extract_scalar_result = true;
3908         }
3909       else
3910         {
3911           tree rhs;
3912 
3913           /*** Case 3: Create:
3914              s = extract_field <v_out2, 0>
3915              for (offset = element_size;
3916                   offset < vector_size;
3917                   offset += element_size;)
3918                {
3919                  Create:  s' = extract_field <v_out2, offset>
3920                  Create:  s = op <s, s'>  // For non SLP cases
3921                }  */
3922 
3923           if (vect_print_dump_info (REPORT_DETAILS))
3924             fprintf (vect_dump, "Reduce using scalar code. ");
3925 
3926           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3927           FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
3928             {
3929               if (gimple_code (new_phi) == GIMPLE_PHI)
3930                 vec_temp = PHI_RESULT (new_phi);
3931               else
3932                 vec_temp = gimple_assign_lhs (new_phi);
3933               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3934                             bitsize_zero_node);
3935               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3936               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3937               gimple_assign_set_lhs (epilog_stmt, new_temp);
3938               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3939 
3940               /* In SLP we don't need to apply reduction operation, so we just
3941                  collect s' values in SCALAR_RESULTS.  */
3942               if (slp_reduc)
3943                 VEC_safe_push (tree, heap, scalar_results, new_temp);
3944 
3945               for (bit_offset = element_bitsize;
3946                    bit_offset < vec_size_in_bits;
3947                    bit_offset += element_bitsize)
3948                 {
3949                   tree bitpos = bitsize_int (bit_offset);
3950                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
3951                                      bitsize, bitpos);
3952 
3953                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3954                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3955                   gimple_assign_set_lhs (epilog_stmt, new_name);
3956                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3957 
3958                   if (slp_reduc)
3959                     {
3960                       /* In SLP we don't need to apply reduction operation, so
3961                          we just collect s' values in SCALAR_RESULTS.  */
3962                       new_temp = new_name;
3963                       VEC_safe_push (tree, heap, scalar_results, new_name);
3964                     }
3965                   else
3966                     {
3967                       epilog_stmt = gimple_build_assign_with_ops (code,
3968                                           new_scalar_dest, new_name, new_temp);
3969                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3970                       gimple_assign_set_lhs (epilog_stmt, new_temp);
3971                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3972                     }
3973                 }
3974             }
3975 
3976           /* The only case where we need to reduce scalar results in SLP, is
3977              unrolling.  If the size of SCALAR_RESULTS is greater than
3978              GROUP_SIZE, we reduce them combining elements modulo
3979              GROUP_SIZE.  */
3980           if (slp_reduc)
3981             {
3982               tree res, first_res, new_res;
3983               gimple new_stmt;
3984 
3985               /* Reduce multiple scalar results in case of SLP unrolling.  */
3986               for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
3987                    j++)
3988                 {
3989                   first_res = VEC_index (tree, scalar_results, j % group_size);
3990                   new_stmt = gimple_build_assign_with_ops (code,
3991                                               new_scalar_dest, first_res, res);
3992                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
3993                   gimple_assign_set_lhs (new_stmt, new_res);
3994                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
3995                   VEC_replace (tree, scalar_results, j % group_size, new_res);
3996                 }
3997             }
3998           else
3999             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4000             VEC_safe_push (tree, heap, scalar_results, new_temp);
4001 
4002           extract_scalar_result = false;
4003         }
4004     }
4005 
4006   /* 2.4  Extract the final scalar result.  Create:
4007           s_out3 = extract_field <v_out2, bitpos>  */
4008 
4009   if (extract_scalar_result)
4010     {
4011       tree rhs;
4012 
4013       if (vect_print_dump_info (REPORT_DETAILS))
4014         fprintf (vect_dump, "extract scalar result");
4015 
4016       if (BYTES_BIG_ENDIAN)
4017         bitpos = size_binop (MULT_EXPR,
4018                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4019                              TYPE_SIZE (scalar_type));
4020       else
4021         bitpos = bitsize_zero_node;
4022 
4023       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4024       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4025       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4026       gimple_assign_set_lhs (epilog_stmt, new_temp);
4027       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4028       VEC_safe_push (tree, heap, scalar_results, new_temp);
4029     }
4030 
4031 vect_finalize_reduction:
4032 
4033   if (double_reduc)
4034     loop = loop->inner;
4035 
4036   /* 2.5 Adjust the final result by the initial value of the reduction
4037 	 variable. (When such adjustment is not needed, then
4038 	 'adjustment_def' is zero).  For example, if code is PLUS we create:
4039 	 new_temp = loop_exit_def + adjustment_def  */
4040 
4041   if (adjustment_def)
4042     {
4043       gcc_assert (!slp_reduc);
4044       if (nested_in_vect_loop)
4045 	{
4046           new_phi = VEC_index (gimple, new_phis, 0);
4047 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4048 	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4049 	  new_dest = vect_create_destination_var (scalar_dest, vectype);
4050 	}
4051       else
4052 	{
4053           new_temp = VEC_index (tree, scalar_results, 0);
4054 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4055 	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
4056 	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4057 	}
4058 
4059       epilog_stmt = gimple_build_assign (new_dest, expr);
4060       new_temp = make_ssa_name (new_dest, epilog_stmt);
4061       gimple_assign_set_lhs (epilog_stmt, new_temp);
4062       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4063       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4064       if (nested_in_vect_loop)
4065         {
4066           set_vinfo_for_stmt (epilog_stmt,
4067                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4068                                                  NULL));
4069           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4070                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4071 
4072           if (!double_reduc)
4073             VEC_quick_push (tree, scalar_results, new_temp);
4074           else
4075             VEC_replace (tree, scalar_results, 0, new_temp);
4076         }
4077       else
4078         VEC_replace (tree, scalar_results, 0, new_temp);
4079 
4080       VEC_replace (gimple, new_phis, 0, epilog_stmt);
4081     }
4082 
4083   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4084           phis with new adjusted scalar results, i.e., replace use <s_out0>
4085           with use <s_out4>.
4086 
4087      Transform:
4088         loop_exit:
4089           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4090           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4091           v_out2 = reduce <v_out1>
4092           s_out3 = extract_field <v_out2, 0>
4093           s_out4 = adjust_result <s_out3>
4094           use <s_out0>
4095           use <s_out0>
4096 
4097      into:
4098 
4099         loop_exit:
4100           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4101           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4102           v_out2 = reduce <v_out1>
4103           s_out3 = extract_field <v_out2, 0>
4104           s_out4 = adjust_result <s_out3>
4105           use <s_out4>
4106           use <s_out4> */
4107 
4108 
4109   /* In SLP reduction chain we reduce vector results into one vector if
4110      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4111      the last stmt in the reduction chain, since we are looking for the loop
4112      exit phi node.  */
4113   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4114     {
4115       scalar_dest = gimple_assign_lhs (VEC_index (gimple,
4116                                        SLP_TREE_SCALAR_STMTS (slp_node),
4117                                        group_size - 1));
4118       group_size = 1;
4119     }
4120 
4121   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4122      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4123      need to match SCALAR_RESULTS with corresponding statements.  The first
4124      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4125      the first vector stmt, etc.
4126      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4127   if (group_size > VEC_length (gimple, new_phis))
4128     {
4129       ratio = group_size / VEC_length (gimple, new_phis);
4130       gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
4131     }
4132   else
4133     ratio = 1;
4134 
4135   for (k = 0; k < group_size; k++)
4136     {
4137       if (k % ratio == 0)
4138         {
4139           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
4140           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
4141 	  if (double_reduc)
4142 	    inner_phi = VEC_index (gimple, inner_phis, k / ratio);
4143         }
4144 
4145       if (slp_reduc)
4146         {
4147           gimple current_stmt = VEC_index (gimple,
4148                                        SLP_TREE_SCALAR_STMTS (slp_node), k);
4149 
4150           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4151           /* SLP statements can't participate in patterns.  */
4152           gcc_assert (!orig_stmt);
4153           scalar_dest = gimple_assign_lhs (current_stmt);
4154         }
4155 
4156       phis = VEC_alloc (gimple, heap, 3);
4157       /* Find the loop-closed-use at the loop exit of the original scalar
4158          result.  (The reduction result is expected to have two immediate uses -
4159          one at the latch block, and one at the loop exit).  */
4160       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4161         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4162           VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4163 
4164       /* We expect to have found an exit_phi because of loop-closed-ssa
4165          form.  */
4166       gcc_assert (!VEC_empty (gimple, phis));
4167 
4168       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4169         {
4170           if (outer_loop)
4171             {
4172               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4173               gimple vect_phi;
4174 
4175               /* FORNOW. Currently not supporting the case that an inner-loop
4176                  reduction is not used in the outer-loop (but only outside the
4177                  outer-loop), unless it is double reduction.  */
4178               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4179                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4180                           || double_reduc);
4181 
4182               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4183               if (!double_reduc
4184                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4185                       != vect_double_reduction_def)
4186                 continue;
4187 
4188               /* Handle double reduction:
4189 
4190                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4191                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4192                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4193                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4194 
4195                  At that point the regular reduction (stmt2 and stmt3) is
4196                  already vectorized, as well as the exit phi node, stmt4.
4197                  Here we vectorize the phi node of double reduction, stmt1, and
4198                  update all relevant statements.  */
4199 
4200               /* Go through all the uses of s2 to find double reduction phi
4201                  node, i.e., stmt1 above.  */
4202               orig_name = PHI_RESULT (exit_phi);
4203               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4204                 {
4205                   stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4206                   stmt_vec_info new_phi_vinfo;
4207                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4208                   basic_block bb = gimple_bb (use_stmt);
4209                   gimple use;
4210 
4211                   /* Check that USE_STMT is really double reduction phi
4212                      node.  */
4213                   if (gimple_code (use_stmt) != GIMPLE_PHI
4214                       || gimple_phi_num_args (use_stmt) != 2
4215                       || !use_stmt_vinfo
4216                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4217                           != vect_double_reduction_def
4218                       || bb->loop_father != outer_loop)
4219                     continue;
4220 
4221                   /* Create vector phi node for double reduction:
4222                      vs1 = phi <vs0, vs2>
4223                      vs1 was created previously in this function by a call to
4224                        vect_get_vec_def_for_operand and is stored in
4225                        vec_initial_def;
4226                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4227                      vs0 is created here.  */
4228 
4229                   /* Create vector phi node.  */
4230                   vect_phi = create_phi_node (vec_initial_def, bb);
4231                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4232                                     loop_vec_info_for_loop (outer_loop), NULL);
4233                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4234 
4235                   /* Create vs0 - initial def of the double reduction phi.  */
4236                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4237                                              loop_preheader_edge (outer_loop));
4238                   init_def = get_initial_def_for_reduction (stmt,
4239                                                           preheader_arg, NULL);
4240                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4241                                                     vectype, NULL);
4242 
4243                   /* Update phi node arguments with vs0 and vs2.  */
4244                   add_phi_arg (vect_phi, vect_phi_init,
4245                                loop_preheader_edge (outer_loop),
4246                                UNKNOWN_LOCATION);
4247                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4248                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4249                   if (vect_print_dump_info (REPORT_DETAILS))
4250                     {
4251                       fprintf (vect_dump, "created double reduction phi "
4252                                           "node: ");
4253                       print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
4254                     }
4255 
4256                   vect_phi_res = PHI_RESULT (vect_phi);
4257 
4258                   /* Replace the use, i.e., set the correct vs1 in the regular
4259                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4260                      loop is redundant.  */
4261                   use = reduction_phi;
4262                   for (j = 0; j < ncopies; j++)
4263                     {
4264                       edge pr_edge = loop_preheader_edge (loop);
4265                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4266                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4267                     }
4268                 }
4269             }
4270         }
4271 
4272       VEC_free (gimple, heap, phis);
4273       if (nested_in_vect_loop)
4274         {
4275           if (double_reduc)
4276             loop = outer_loop;
4277           else
4278             continue;
4279         }
4280 
4281       phis = VEC_alloc (gimple, heap, 3);
4282       /* Find the loop-closed-use at the loop exit of the original scalar
4283          result.  (The reduction result is expected to have two immediate uses,
4284          one at the latch block, and one at the loop exit).  For double
4285          reductions we are looking for exit phis of the outer loop.  */
4286       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4287         {
4288           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4289             VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4290           else
4291             {
4292               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4293                 {
4294                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4295 
4296                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4297                     {
4298                       if (!flow_bb_inside_loop_p (loop,
4299                                              gimple_bb (USE_STMT (phi_use_p))))
4300                         VEC_safe_push (gimple, heap, phis,
4301                                        USE_STMT (phi_use_p));
4302                     }
4303                 }
4304             }
4305         }
4306 
4307       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4308         {
4309           /* Replace the uses:  */
4310           orig_name = PHI_RESULT (exit_phi);
4311           scalar_result = VEC_index (tree, scalar_results, k);
4312           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4313             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4314               SET_USE (use_p, scalar_result);
4315         }
4316 
4317       VEC_free (gimple, heap, phis);
4318     }
4319 
4320   VEC_free (tree, heap, scalar_results);
4321   VEC_free (gimple, heap, new_phis);
4322 }
4323 
4324 
4325 /* Function vectorizable_reduction.
4326 
4327    Check if STMT performs a reduction operation that can be vectorized.
4328    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4329    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4330    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4331 
4332    This function also handles reduction idioms (patterns) that have been
4333    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4334    of this form:
4335      X = pattern_expr (arg0, arg1, ..., X)
4336    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4337    sequence that had been detected and replaced by the pattern-stmt (STMT).
4338 
4339    In some cases of reduction patterns, the type of the reduction variable X is
4340    different than the type of the other arguments of STMT.
4341    In such cases, the vectype that is used when transforming STMT into a vector
4342    stmt is different than the vectype that is used to determine the
4343    vectorization factor, because it consists of a different number of elements
4344    than the actual number of elements that are being operated upon in parallel.
4345 
4346    For example, consider an accumulation of shorts into an int accumulator.
4347    On some targets it's possible to vectorize this pattern operating on 8
4348    shorts at a time (hence, the vectype for purposes of determining the
4349    vectorization factor should be V8HI); on the other hand, the vectype that
4350    is used to create the vector form is actually V4SI (the type of the result).
4351 
4352    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4353    indicates what is the actual level of parallelism (V8HI in the example), so
4354    that the right vectorization factor would be derived.  This vectype
4355    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4356    be used to create the vectorized stmt.  The right vectype for the vectorized
4357    stmt is obtained from the type of the result X:
4358         get_vectype_for_scalar_type (TREE_TYPE (X))
4359 
4360    This means that, contrary to "regular" reductions (or "regular" stmts in
4361    general), the following equation:
4362       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4363    does *NOT* necessarily hold for reduction patterns.  */
4364 
4365 bool
4366 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4367 			gimple *vec_stmt, slp_tree slp_node)
4368 {
4369   tree vec_dest;
4370   tree scalar_dest;
4371   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4372   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4373   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4374   tree vectype_in = NULL_TREE;
4375   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4376   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4377   enum tree_code code, orig_code, epilog_reduc_code;
4378   enum machine_mode vec_mode;
4379   int op_type;
4380   optab optab, reduc_optab;
4381   tree new_temp = NULL_TREE;
4382   tree def;
4383   gimple def_stmt;
4384   enum vect_def_type dt;
4385   gimple new_phi = NULL;
4386   tree scalar_type;
4387   bool is_simple_use;
4388   gimple orig_stmt;
4389   stmt_vec_info orig_stmt_info;
4390   tree expr = NULL_TREE;
4391   int i;
4392   int ncopies;
4393   int epilog_copies;
4394   stmt_vec_info prev_stmt_info, prev_phi_info;
4395   bool single_defuse_cycle = false;
4396   tree reduc_def = NULL_TREE;
4397   gimple new_stmt = NULL;
4398   int j;
4399   tree ops[3];
4400   bool nested_cycle = false, found_nested_cycle_def = false;
4401   gimple reduc_def_stmt = NULL;
4402   /* The default is that the reduction variable is the last in statement.  */
4403   int reduc_index = 2;
4404   bool double_reduc = false, dummy;
4405   basic_block def_bb;
4406   struct loop * def_stmt_loop, *outer_loop = NULL;
4407   tree def_arg;
4408   gimple def_arg_stmt;
4409   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
4410   VEC (gimple, heap) *phis = NULL;
4411   int vec_num;
4412   tree def0, def1, tem, op0, op1 = NULL_TREE;
4413 
4414   /* In case of reduction chain we switch to the first stmt in the chain, but
4415      we don't update STMT_INFO, since only the last stmt is marked as reduction
4416      and has reduction properties.  */
4417   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4418     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4419 
4420   if (nested_in_vect_loop_p (loop, stmt))
4421     {
4422       outer_loop = loop;
4423       loop = loop->inner;
4424       nested_cycle = true;
4425     }
4426 
4427   /* 1. Is vectorizable reduction?  */
4428   /* Not supportable if the reduction variable is used in the loop, unless
4429      it's a reduction chain.  */
4430   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4431       && !GROUP_FIRST_ELEMENT (stmt_info))
4432     return false;
4433 
4434   /* Reductions that are not used even in an enclosing outer-loop,
4435      are expected to be "live" (used out of the loop).  */
4436   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4437       && !STMT_VINFO_LIVE_P (stmt_info))
4438     return false;
4439 
4440   /* Make sure it was already recognized as a reduction computation.  */
4441   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4442       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4443     return false;
4444 
4445   /* 2. Has this been recognized as a reduction pattern?
4446 
4447      Check if STMT represents a pattern that has been recognized
4448      in earlier analysis stages.  For stmts that represent a pattern,
4449      the STMT_VINFO_RELATED_STMT field records the last stmt in
4450      the original sequence that constitutes the pattern.  */
4451 
4452   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4453   if (orig_stmt)
4454     {
4455       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4456       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4457       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4458     }
4459 
4460   /* 3. Check the operands of the operation.  The first operands are defined
4461         inside the loop body. The last operand is the reduction variable,
4462         which is defined by the loop-header-phi.  */
4463 
4464   gcc_assert (is_gimple_assign (stmt));
4465 
4466   /* Flatten RHS.  */
4467   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4468     {
4469     case GIMPLE_SINGLE_RHS:
4470       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4471       if (op_type == ternary_op)
4472 	{
4473 	  tree rhs = gimple_assign_rhs1 (stmt);
4474 	  ops[0] = TREE_OPERAND (rhs, 0);
4475 	  ops[1] = TREE_OPERAND (rhs, 1);
4476 	  ops[2] = TREE_OPERAND (rhs, 2);
4477 	  code = TREE_CODE (rhs);
4478 	}
4479       else
4480 	return false;
4481       break;
4482 
4483     case GIMPLE_BINARY_RHS:
4484       code = gimple_assign_rhs_code (stmt);
4485       op_type = TREE_CODE_LENGTH (code);
4486       gcc_assert (op_type == binary_op);
4487       ops[0] = gimple_assign_rhs1 (stmt);
4488       ops[1] = gimple_assign_rhs2 (stmt);
4489       break;
4490 
4491     case GIMPLE_TERNARY_RHS:
4492       code = gimple_assign_rhs_code (stmt);
4493       op_type = TREE_CODE_LENGTH (code);
4494       gcc_assert (op_type == ternary_op);
4495       ops[0] = gimple_assign_rhs1 (stmt);
4496       ops[1] = gimple_assign_rhs2 (stmt);
4497       ops[2] = gimple_assign_rhs3 (stmt);
4498       break;
4499 
4500     case GIMPLE_UNARY_RHS:
4501       return false;
4502 
4503     default:
4504       gcc_unreachable ();
4505     }
4506 
4507   if (code == COND_EXPR && slp_node)
4508     return false;
4509 
4510   scalar_dest = gimple_assign_lhs (stmt);
4511   scalar_type = TREE_TYPE (scalar_dest);
4512   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4513       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4514     return false;
4515 
4516   /* Do not try to vectorize bit-precision reductions.  */
4517   if ((TYPE_PRECISION (scalar_type)
4518        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4519     return false;
4520 
4521   /* All uses but the last are expected to be defined in the loop.
4522      The last use is the reduction variable.  In case of nested cycle this
4523      assumption is not true: we use reduc_index to record the index of the
4524      reduction variable.  */
4525   for (i = 0; i < op_type - 1; i++)
4526     {
4527       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4528       if (i == 0 && code == COND_EXPR)
4529         continue;
4530 
4531       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4532 					    &def_stmt, &def, &dt, &tem);
4533       if (!vectype_in)
4534 	vectype_in = tem;
4535       gcc_assert (is_simple_use);
4536 
4537       if (dt != vect_internal_def
4538 	  && dt != vect_external_def
4539 	  && dt != vect_constant_def
4540 	  && dt != vect_induction_def
4541           && !(dt == vect_nested_cycle && nested_cycle))
4542 	return false;
4543 
4544       if (dt == vect_nested_cycle)
4545         {
4546           found_nested_cycle_def = true;
4547           reduc_def_stmt = def_stmt;
4548           reduc_index = i;
4549         }
4550     }
4551 
4552   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4553 					&def_stmt, &def, &dt, &tem);
4554   if (!vectype_in)
4555     vectype_in = tem;
4556   gcc_assert (is_simple_use);
4557   if (!(dt == vect_reduction_def
4558 	|| dt == vect_nested_cycle
4559 	|| ((dt == vect_internal_def || dt == vect_external_def
4560 	     || dt == vect_constant_def || dt == vect_induction_def)
4561 	    && nested_cycle && found_nested_cycle_def)))
4562     {
4563       /* For pattern recognized stmts, orig_stmt might be a reduction,
4564 	 but some helper statements for the pattern might not, or
4565 	 might be COND_EXPRs with reduction uses in the condition.  */
4566       gcc_assert (orig_stmt);
4567       return false;
4568     }
4569   if (!found_nested_cycle_def)
4570     reduc_def_stmt = def_stmt;
4571 
4572   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4573   if (orig_stmt)
4574     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4575                                                        reduc_def_stmt,
4576                                                        !nested_cycle,
4577                                                        &dummy));
4578   else
4579     {
4580       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4581                                              !nested_cycle, &dummy);
4582       /* We changed STMT to be the first stmt in reduction chain, hence we
4583          check that in this case the first element in the chain is STMT.  */
4584       gcc_assert (stmt == tmp
4585                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4586     }
4587 
4588   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4589     return false;
4590 
4591   if (slp_node || PURE_SLP_STMT (stmt_info))
4592     ncopies = 1;
4593   else
4594     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4595                / TYPE_VECTOR_SUBPARTS (vectype_in));
4596 
4597   gcc_assert (ncopies >= 1);
4598 
4599   vec_mode = TYPE_MODE (vectype_in);
4600 
4601   if (code == COND_EXPR)
4602     {
4603       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4604         {
4605           if (vect_print_dump_info (REPORT_DETAILS))
4606             fprintf (vect_dump, "unsupported condition in reduction");
4607 
4608             return false;
4609         }
4610     }
4611   else
4612     {
4613       /* 4. Supportable by target?  */
4614 
4615       /* 4.1. check support for the operation in the loop  */
4616       optab = optab_for_tree_code (code, vectype_in, optab_default);
4617       if (!optab)
4618         {
4619           if (vect_print_dump_info (REPORT_DETAILS))
4620             fprintf (vect_dump, "no optab.");
4621 
4622           return false;
4623         }
4624 
4625       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4626         {
4627           if (vect_print_dump_info (REPORT_DETAILS))
4628             fprintf (vect_dump, "op not supported by target.");
4629 
4630           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4631               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4632 	          < vect_min_worthwhile_factor (code))
4633             return false;
4634 
4635           if (vect_print_dump_info (REPORT_DETAILS))
4636   	    fprintf (vect_dump, "proceeding using word mode.");
4637         }
4638 
4639       /* Worthwhile without SIMD support?  */
4640       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4641           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4642    	     < vect_min_worthwhile_factor (code))
4643         {
4644           if (vect_print_dump_info (REPORT_DETAILS))
4645 	    fprintf (vect_dump, "not worthwhile without SIMD support.");
4646 
4647           return false;
4648         }
4649     }
4650 
4651   /* 4.2. Check support for the epilog operation.
4652 
4653           If STMT represents a reduction pattern, then the type of the
4654           reduction variable may be different than the type of the rest
4655           of the arguments.  For example, consider the case of accumulation
4656           of shorts into an int accumulator; The original code:
4657                         S1: int_a = (int) short_a;
4658           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4659 
4660           was replaced with:
4661                         STMT: int_acc = widen_sum <short_a, int_acc>
4662 
4663           This means that:
4664           1. The tree-code that is used to create the vector operation in the
4665              epilog code (that reduces the partial results) is not the
4666              tree-code of STMT, but is rather the tree-code of the original
4667              stmt from the pattern that STMT is replacing.  I.e, in the example
4668              above we want to use 'widen_sum' in the loop, but 'plus' in the
4669              epilog.
4670           2. The type (mode) we use to check available target support
4671              for the vector operation to be created in the *epilog*, is
4672              determined by the type of the reduction variable (in the example
4673              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4674              However the type (mode) we use to check available target support
4675              for the vector operation to be created *inside the loop*, is
4676              determined by the type of the other arguments to STMT (in the
4677              example we'd check this: optab_handler (widen_sum_optab,
4678 	     vect_short_mode)).
4679 
4680           This is contrary to "regular" reductions, in which the types of all
4681           the arguments are the same as the type of the reduction variable.
4682           For "regular" reductions we can therefore use the same vector type
4683           (and also the same tree-code) when generating the epilog code and
4684           when generating the code inside the loop.  */
4685 
4686   if (orig_stmt)
4687     {
4688       /* This is a reduction pattern: get the vectype from the type of the
4689          reduction variable, and get the tree-code from orig_stmt.  */
4690       orig_code = gimple_assign_rhs_code (orig_stmt);
4691       gcc_assert (vectype_out);
4692       vec_mode = TYPE_MODE (vectype_out);
4693     }
4694   else
4695     {
4696       /* Regular reduction: use the same vectype and tree-code as used for
4697          the vector code inside the loop can be used for the epilog code. */
4698       orig_code = code;
4699     }
4700 
4701   if (nested_cycle)
4702     {
4703       def_bb = gimple_bb (reduc_def_stmt);
4704       def_stmt_loop = def_bb->loop_father;
4705       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4706                                        loop_preheader_edge (def_stmt_loop));
4707       if (TREE_CODE (def_arg) == SSA_NAME
4708           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4709           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4710           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4711           && vinfo_for_stmt (def_arg_stmt)
4712           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4713               == vect_double_reduction_def)
4714         double_reduc = true;
4715     }
4716 
4717   epilog_reduc_code = ERROR_MARK;
4718   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4719     {
4720       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4721                                          optab_default);
4722       if (!reduc_optab)
4723         {
4724           if (vect_print_dump_info (REPORT_DETAILS))
4725             fprintf (vect_dump, "no optab for reduction.");
4726 
4727           epilog_reduc_code = ERROR_MARK;
4728         }
4729 
4730       if (reduc_optab
4731           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4732         {
4733           if (vect_print_dump_info (REPORT_DETAILS))
4734             fprintf (vect_dump, "reduc op not supported by target.");
4735 
4736           epilog_reduc_code = ERROR_MARK;
4737         }
4738     }
4739   else
4740     {
4741       if (!nested_cycle || double_reduc)
4742         {
4743           if (vect_print_dump_info (REPORT_DETAILS))
4744             fprintf (vect_dump, "no reduc code for scalar code.");
4745 
4746           return false;
4747         }
4748     }
4749 
4750   if (double_reduc && ncopies > 1)
4751     {
4752       if (vect_print_dump_info (REPORT_DETAILS))
4753         fprintf (vect_dump, "multiple types in double reduction");
4754 
4755       return false;
4756     }
4757 
4758   /* In case of widenning multiplication by a constant, we update the type
4759      of the constant to be the type of the other operand.  We check that the
4760      constant fits the type in the pattern recognition pass.  */
4761   if (code == DOT_PROD_EXPR
4762       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4763     {
4764       if (TREE_CODE (ops[0]) == INTEGER_CST)
4765         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4766       else if (TREE_CODE (ops[1]) == INTEGER_CST)
4767         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4768       else
4769         {
4770           if (vect_print_dump_info (REPORT_DETAILS))
4771             fprintf (vect_dump, "invalid types in dot-prod");
4772 
4773           return false;
4774         }
4775     }
4776 
4777   if (!vec_stmt) /* transformation not required.  */
4778     {
4779       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4780         return false;
4781       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4782       return true;
4783     }
4784 
4785   /** Transform.  **/
4786 
4787   if (vect_print_dump_info (REPORT_DETAILS))
4788     fprintf (vect_dump, "transform reduction.");
4789 
4790   /* FORNOW: Multiple types are not supported for condition.  */
4791   if (code == COND_EXPR)
4792     gcc_assert (ncopies == 1);
4793 
4794   /* Create the destination vector  */
4795   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4796 
4797   /* In case the vectorization factor (VF) is bigger than the number
4798      of elements that we can fit in a vectype (nunits), we have to generate
4799      more than one vector stmt - i.e - we need to "unroll" the
4800      vector stmt by a factor VF/nunits.  For more details see documentation
4801      in vectorizable_operation.  */
4802 
4803   /* If the reduction is used in an outer loop we need to generate
4804      VF intermediate results, like so (e.g. for ncopies=2):
4805 	r0 = phi (init, r0)
4806 	r1 = phi (init, r1)
4807 	r0 = x0 + r0;
4808         r1 = x1 + r1;
4809     (i.e. we generate VF results in 2 registers).
4810     In this case we have a separate def-use cycle for each copy, and therefore
4811     for each copy we get the vector def for the reduction variable from the
4812     respective phi node created for this copy.
4813 
4814     Otherwise (the reduction is unused in the loop nest), we can combine
4815     together intermediate results, like so (e.g. for ncopies=2):
4816 	r = phi (init, r)
4817 	r = x0 + r;
4818 	r = x1 + r;
4819    (i.e. we generate VF/2 results in a single register).
4820    In this case for each copy we get the vector def for the reduction variable
4821    from the vectorized reduction operation generated in the previous iteration.
4822   */
4823 
4824   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
4825     {
4826       single_defuse_cycle = true;
4827       epilog_copies = 1;
4828     }
4829   else
4830     epilog_copies = ncopies;
4831 
4832   prev_stmt_info = NULL;
4833   prev_phi_info = NULL;
4834   if (slp_node)
4835     {
4836       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4837       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
4838                   == TYPE_VECTOR_SUBPARTS (vectype_in));
4839     }
4840   else
4841     {
4842       vec_num = 1;
4843       vec_oprnds0 = VEC_alloc (tree, heap, 1);
4844       if (op_type == ternary_op)
4845         vec_oprnds1 = VEC_alloc (tree, heap, 1);
4846     }
4847 
4848   phis = VEC_alloc (gimple, heap, vec_num);
4849   vect_defs = VEC_alloc (tree, heap, vec_num);
4850   if (!slp_node)
4851     VEC_quick_push (tree, vect_defs, NULL_TREE);
4852 
4853   for (j = 0; j < ncopies; j++)
4854     {
4855       if (j == 0 || !single_defuse_cycle)
4856 	{
4857           for (i = 0; i < vec_num; i++)
4858             {
4859               /* Create the reduction-phi that defines the reduction
4860                  operand.  */
4861               new_phi = create_phi_node (vec_dest, loop->header);
4862               set_vinfo_for_stmt (new_phi,
4863                                   new_stmt_vec_info (new_phi, loop_vinfo,
4864                                                      NULL));
4865                if (j == 0 || slp_node)
4866                  VEC_quick_push (gimple, phis, new_phi);
4867             }
4868         }
4869 
4870       if (code == COND_EXPR)
4871         {
4872           gcc_assert (!slp_node);
4873           vectorizable_condition (stmt, gsi, vec_stmt,
4874                                   PHI_RESULT (VEC_index (gimple, phis, 0)),
4875                                   reduc_index, NULL);
4876           /* Multiple types are not supported for condition.  */
4877           break;
4878         }
4879 
4880       /* Handle uses.  */
4881       if (j == 0)
4882         {
4883           op0 = ops[!reduc_index];
4884           if (op_type == ternary_op)
4885             {
4886               if (reduc_index == 0)
4887                 op1 = ops[2];
4888               else
4889                 op1 = ops[1];
4890             }
4891 
4892           if (slp_node)
4893             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4894                                slp_node, -1);
4895           else
4896             {
4897               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
4898                                                             stmt, NULL);
4899               VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
4900               if (op_type == ternary_op)
4901                {
4902                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
4903                                                                NULL);
4904                  VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
4905                }
4906             }
4907         }
4908       else
4909         {
4910           if (!slp_node)
4911             {
4912               enum vect_def_type dt;
4913               gimple dummy_stmt;
4914               tree dummy;
4915 
4916               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
4917                                   &dummy_stmt, &dummy, &dt);
4918               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
4919                                                               loop_vec_def0);
4920               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
4921               if (op_type == ternary_op)
4922                 {
4923                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
4924                                       &dummy, &dt);
4925                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
4926                                                                 loop_vec_def1);
4927                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
4928                 }
4929             }
4930 
4931           if (single_defuse_cycle)
4932             reduc_def = gimple_assign_lhs (new_stmt);
4933 
4934           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
4935         }
4936 
4937       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
4938         {
4939           if (slp_node)
4940             reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
4941           else
4942             {
4943               if (!single_defuse_cycle || j == 0)
4944                 reduc_def = PHI_RESULT (new_phi);
4945             }
4946 
4947           def1 = ((op_type == ternary_op)
4948                   ? VEC_index (tree, vec_oprnds1, i) : NULL);
4949           if (op_type == binary_op)
4950             {
4951               if (reduc_index == 0)
4952                 expr = build2 (code, vectype_out, reduc_def, def0);
4953               else
4954                 expr = build2 (code, vectype_out, def0, reduc_def);
4955             }
4956           else
4957             {
4958               if (reduc_index == 0)
4959                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
4960               else
4961                 {
4962                   if (reduc_index == 1)
4963                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
4964                   else
4965                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
4966                 }
4967             }
4968 
4969           new_stmt = gimple_build_assign (vec_dest, expr);
4970           new_temp = make_ssa_name (vec_dest, new_stmt);
4971           gimple_assign_set_lhs (new_stmt, new_temp);
4972           vect_finish_stmt_generation (stmt, new_stmt, gsi);
4973 
4974           if (slp_node)
4975             {
4976               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4977               VEC_quick_push (tree, vect_defs, new_temp);
4978             }
4979           else
4980             VEC_replace (tree, vect_defs, 0, new_temp);
4981         }
4982 
4983       if (slp_node)
4984         continue;
4985 
4986       if (j == 0)
4987 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4988       else
4989 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4990 
4991       prev_stmt_info = vinfo_for_stmt (new_stmt);
4992       prev_phi_info = vinfo_for_stmt (new_phi);
4993     }
4994 
4995   /* Finalize the reduction-phi (set its arguments) and create the
4996      epilog reduction code.  */
4997   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
4998     {
4999       new_temp = gimple_assign_lhs (*vec_stmt);
5000       VEC_replace (tree, vect_defs, 0, new_temp);
5001     }
5002 
5003   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5004                                     epilog_reduc_code, phis, reduc_index,
5005                                     double_reduc, slp_node);
5006 
5007   VEC_free (gimple, heap, phis);
5008   VEC_free (tree, heap, vec_oprnds0);
5009   if (vec_oprnds1)
5010     VEC_free (tree, heap, vec_oprnds1);
5011 
5012   return true;
5013 }
5014 
5015 /* Function vect_min_worthwhile_factor.
5016 
5017    For a loop where we could vectorize the operation indicated by CODE,
5018    return the minimum vectorization factor that makes it worthwhile
5019    to use generic vectors.  */
5020 int
5021 vect_min_worthwhile_factor (enum tree_code code)
5022 {
5023   switch (code)
5024     {
5025     case PLUS_EXPR:
5026     case MINUS_EXPR:
5027     case NEGATE_EXPR:
5028       return 4;
5029 
5030     case BIT_AND_EXPR:
5031     case BIT_IOR_EXPR:
5032     case BIT_XOR_EXPR:
5033     case BIT_NOT_EXPR:
5034       return 2;
5035 
5036     default:
5037       return INT_MAX;
5038     }
5039 }
5040 
5041 
5042 /* Function vectorizable_induction
5043 
5044    Check if PHI performs an induction computation that can be vectorized.
5045    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5046    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5047    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5048 
5049 bool
5050 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5051 			gimple *vec_stmt)
5052 {
5053   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5054   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5055   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5056   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5057   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5058   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5059   tree vec_def;
5060 
5061   gcc_assert (ncopies >= 1);
5062   /* FORNOW. These restrictions should be relaxed.  */
5063   if (nested_in_vect_loop_p (loop, phi))
5064     {
5065       imm_use_iterator imm_iter;
5066       use_operand_p use_p;
5067       gimple exit_phi;
5068       edge latch_e;
5069       tree loop_arg;
5070 
5071       if (ncopies > 1)
5072 	{
5073 	  if (vect_print_dump_info (REPORT_DETAILS))
5074 	    fprintf (vect_dump, "multiple types in nested loop.");
5075 	  return false;
5076 	}
5077 
5078       exit_phi = NULL;
5079       latch_e = loop_latch_edge (loop->inner);
5080       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5081       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5082 	{
5083 	  if (!flow_bb_inside_loop_p (loop->inner,
5084 				      gimple_bb (USE_STMT (use_p))))
5085 	    {
5086 	      exit_phi = USE_STMT (use_p);
5087 	      break;
5088 	    }
5089 	}
5090       if (exit_phi)
5091 	{
5092 	  stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5093 	  if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5094 		&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5095 	    {
5096 	      if (vect_print_dump_info (REPORT_DETAILS))
5097 		fprintf (vect_dump, "inner-loop induction only used outside "
5098 			 "of the outer vectorized loop.");
5099 	      return false;
5100 	    }
5101 	}
5102     }
5103 
5104   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5105     return false;
5106 
5107   /* FORNOW: SLP not supported.  */
5108   if (STMT_SLP_TYPE (stmt_info))
5109     return false;
5110 
5111   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5112 
5113   if (gimple_code (phi) != GIMPLE_PHI)
5114     return false;
5115 
5116   if (!vec_stmt) /* transformation not required.  */
5117     {
5118       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5119       if (vect_print_dump_info (REPORT_DETAILS))
5120         fprintf (vect_dump, "=== vectorizable_induction ===");
5121       vect_model_induction_cost (stmt_info, ncopies);
5122       return true;
5123     }
5124 
5125   /** Transform.  **/
5126 
5127   if (vect_print_dump_info (REPORT_DETAILS))
5128     fprintf (vect_dump, "transform induction phi.");
5129 
5130   vec_def = get_initial_def_for_induction (phi);
5131   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5132   return true;
5133 }
5134 
5135 /* Function vectorizable_live_operation.
5136 
5137    STMT computes a value that is used outside the loop.  Check if
5138    it can be supported.  */
5139 
5140 bool
5141 vectorizable_live_operation (gimple stmt,
5142 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5143 			     gimple *vec_stmt ATTRIBUTE_UNUSED)
5144 {
5145   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5146   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5147   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5148   int i;
5149   int op_type;
5150   tree op;
5151   tree def;
5152   gimple def_stmt;
5153   enum vect_def_type dt;
5154   enum tree_code code;
5155   enum gimple_rhs_class rhs_class;
5156 
5157   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5158 
5159   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5160     return false;
5161 
5162   if (!is_gimple_assign (stmt))
5163     return false;
5164 
5165   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5166     return false;
5167 
5168   /* FORNOW. CHECKME. */
5169   if (nested_in_vect_loop_p (loop, stmt))
5170     return false;
5171 
5172   code = gimple_assign_rhs_code (stmt);
5173   op_type = TREE_CODE_LENGTH (code);
5174   rhs_class = get_gimple_rhs_class (code);
5175   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5176   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5177 
5178   /* FORNOW: support only if all uses are invariant.  This means
5179      that the scalar operations can remain in place, unvectorized.
5180      The original last scalar value that they compute will be used.  */
5181 
5182   for (i = 0; i < op_type; i++)
5183     {
5184       if (rhs_class == GIMPLE_SINGLE_RHS)
5185 	op = TREE_OPERAND (gimple_op (stmt, 1), i);
5186       else
5187 	op = gimple_op (stmt, i + 1);
5188       if (op
5189           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5190 				  &dt))
5191         {
5192           if (vect_print_dump_info (REPORT_DETAILS))
5193             fprintf (vect_dump, "use not simple.");
5194           return false;
5195         }
5196 
5197       if (dt != vect_external_def && dt != vect_constant_def)
5198         return false;
5199     }
5200 
5201   /* No transformation is required for the cases we currently support.  */
5202   return true;
5203 }
5204 
5205 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5206 
5207 static void
5208 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5209 {
5210   ssa_op_iter op_iter;
5211   imm_use_iterator imm_iter;
5212   def_operand_p def_p;
5213   gimple ustmt;
5214 
5215   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5216     {
5217       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5218 	{
5219 	  basic_block bb;
5220 
5221 	  if (!is_gimple_debug (ustmt))
5222 	    continue;
5223 
5224 	  bb = gimple_bb (ustmt);
5225 
5226 	  if (!flow_bb_inside_loop_p (loop, bb))
5227 	    {
5228 	      if (gimple_debug_bind_p (ustmt))
5229 		{
5230 		  if (vect_print_dump_info (REPORT_DETAILS))
5231 		    fprintf (vect_dump, "killing debug use");
5232 
5233 		  gimple_debug_bind_reset_value (ustmt);
5234 		  update_stmt (ustmt);
5235 		}
5236 	      else
5237 		gcc_unreachable ();
5238 	    }
5239 	}
5240     }
5241 }
5242 
5243 /* Function vect_transform_loop.
5244 
5245    The analysis phase has determined that the loop is vectorizable.
5246    Vectorize the loop - created vectorized stmts to replace the scalar
5247    stmts in the loop, and update the loop exit condition.  */
5248 
5249 void
5250 vect_transform_loop (loop_vec_info loop_vinfo)
5251 {
5252   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5253   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5254   int nbbs = loop->num_nodes;
5255   gimple_stmt_iterator si;
5256   int i;
5257   tree ratio = NULL;
5258   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5259   bool strided_store;
5260   bool slp_scheduled = false;
5261   unsigned int nunits;
5262   tree cond_expr = NULL_TREE;
5263   gimple_seq cond_expr_stmt_list = NULL;
5264   bool do_peeling_for_loop_bound;
5265   gimple stmt, pattern_stmt;
5266   gimple_seq pattern_def_seq = NULL;
5267   gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
5268   bool transform_pattern_stmt = false;
5269 
5270   if (vect_print_dump_info (REPORT_DETAILS))
5271     fprintf (vect_dump, "=== vec_transform_loop ===");
5272 
5273   /* Peel the loop if there are data refs with unknown alignment.
5274      Only one data ref with unknown store is allowed.  */
5275 
5276   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5277     vect_do_peeling_for_alignment (loop_vinfo);
5278 
5279   do_peeling_for_loop_bound
5280     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5281        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5282 	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5283        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
5284 
5285   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5286       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5287     vect_loop_versioning (loop_vinfo,
5288 			  !do_peeling_for_loop_bound,
5289 			  &cond_expr, &cond_expr_stmt_list);
5290 
5291   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5292      compile time constant), or it is a constant that doesn't divide by the
5293      vectorization factor, then an epilog loop needs to be created.
5294      We therefore duplicate the loop: the original loop will be vectorized,
5295      and will compute the first (n/VF) iterations.  The second copy of the loop
5296      will remain scalar and will compute the remaining (n%VF) iterations.
5297      (VF is the vectorization factor).  */
5298 
5299   if (do_peeling_for_loop_bound)
5300     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5301 				    cond_expr, cond_expr_stmt_list);
5302   else
5303     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5304 		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5305 
5306   /* 1) Make sure the loop header has exactly two entries
5307      2) Make sure we have a preheader basic block.  */
5308 
5309   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5310 
5311   split_edge (loop_preheader_edge (loop));
5312 
5313   /* FORNOW: the vectorizer supports only loops which body consist
5314      of one basic block (header + empty latch). When the vectorizer will
5315      support more involved loop forms, the order by which the BBs are
5316      traversed need to be reconsidered.  */
5317 
5318   for (i = 0; i < nbbs; i++)
5319     {
5320       basic_block bb = bbs[i];
5321       stmt_vec_info stmt_info;
5322       gimple phi;
5323 
5324       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5325         {
5326 	  phi = gsi_stmt (si);
5327 	  if (vect_print_dump_info (REPORT_DETAILS))
5328 	    {
5329 	      fprintf (vect_dump, "------>vectorizing phi: ");
5330 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
5331 	    }
5332 	  stmt_info = vinfo_for_stmt (phi);
5333 	  if (!stmt_info)
5334 	    continue;
5335 
5336 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5337 	    vect_loop_kill_debug_uses (loop, phi);
5338 
5339 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5340 	      && !STMT_VINFO_LIVE_P (stmt_info))
5341 	    continue;
5342 
5343 	  if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5344 	        != (unsigned HOST_WIDE_INT) vectorization_factor)
5345 	      && vect_print_dump_info (REPORT_DETAILS))
5346 	    fprintf (vect_dump, "multiple-types.");
5347 
5348 	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5349 	    {
5350 	      if (vect_print_dump_info (REPORT_DETAILS))
5351 		fprintf (vect_dump, "transform phi.");
5352 	      vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5353 	    }
5354 	}
5355 
5356       pattern_stmt = NULL;
5357       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5358 	{
5359 	  bool is_store;
5360 
5361           if (transform_pattern_stmt)
5362 	    stmt = pattern_stmt;
5363           else
5364             stmt = gsi_stmt (si);
5365 
5366 	  if (vect_print_dump_info (REPORT_DETAILS))
5367 	    {
5368 	      fprintf (vect_dump, "------>vectorizing statement: ");
5369 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5370 	    }
5371 
5372 	  stmt_info = vinfo_for_stmt (stmt);
5373 
5374 	  /* vector stmts created in the outer-loop during vectorization of
5375 	     stmts in an inner-loop may not have a stmt_info, and do not
5376 	     need to be vectorized.  */
5377 	  if (!stmt_info)
5378 	    {
5379 	      gsi_next (&si);
5380 	      continue;
5381 	    }
5382 
5383 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5384 	    vect_loop_kill_debug_uses (loop, stmt);
5385 
5386 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5387 	      && !STMT_VINFO_LIVE_P (stmt_info))
5388             {
5389               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5390                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5391                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5392                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5393                 {
5394                   stmt = pattern_stmt;
5395                   stmt_info = vinfo_for_stmt (stmt);
5396                 }
5397               else
5398 	        {
5399    	          gsi_next (&si);
5400 	          continue;
5401                 }
5402 	    }
5403           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5404                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5405                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5406                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5407             transform_pattern_stmt = true;
5408 
5409 	  /* If pattern statement has def stmts, vectorize them too.  */
5410 	  if (is_pattern_stmt_p (stmt_info))
5411 	    {
5412 	      if (pattern_def_seq == NULL)
5413 		{
5414 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5415 		  pattern_def_si = gsi_start (pattern_def_seq);
5416 		}
5417 	      else if (!gsi_end_p (pattern_def_si))
5418 		gsi_next (&pattern_def_si);
5419 	      if (pattern_def_seq != NULL)
5420 		{
5421 		  gimple pattern_def_stmt = NULL;
5422 		  stmt_vec_info pattern_def_stmt_info = NULL;
5423 
5424 		  while (!gsi_end_p (pattern_def_si))
5425 		    {
5426 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
5427 		      pattern_def_stmt_info
5428 			= vinfo_for_stmt (pattern_def_stmt);
5429 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5430 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5431 			break;
5432 		      gsi_next (&pattern_def_si);
5433 		    }
5434 
5435 		  if (!gsi_end_p (pattern_def_si))
5436 		    {
5437 		      if (vect_print_dump_info (REPORT_DETAILS))
5438 			{
5439 			  fprintf (vect_dump, "==> vectorizing pattern def"
5440 					      " stmt: ");
5441 			  print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
5442 					     TDF_SLIM);
5443 			}
5444 
5445 		      stmt = pattern_def_stmt;
5446 		      stmt_info = pattern_def_stmt_info;
5447 		    }
5448 		  else
5449 		    {
5450 		      pattern_def_si = gsi_start (NULL);
5451 		      transform_pattern_stmt = false;
5452 		    }
5453 		}
5454 	      else
5455 		transform_pattern_stmt = false;
5456             }
5457 
5458 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5459 	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5460                                                STMT_VINFO_VECTYPE (stmt_info));
5461 	  if (!STMT_SLP_TYPE (stmt_info)
5462 	      && nunits != (unsigned int) vectorization_factor
5463               && vect_print_dump_info (REPORT_DETAILS))
5464 	    /* For SLP VF is set according to unrolling factor, and not to
5465 	       vector size, hence for SLP this print is not valid.  */
5466             fprintf (vect_dump, "multiple-types.");
5467 
5468 	  /* SLP. Schedule all the SLP instances when the first SLP stmt is
5469 	     reached.  */
5470 	  if (STMT_SLP_TYPE (stmt_info))
5471 	    {
5472 	      if (!slp_scheduled)
5473 		{
5474 		  slp_scheduled = true;
5475 
5476 		  if (vect_print_dump_info (REPORT_DETAILS))
5477 		    fprintf (vect_dump, "=== scheduling SLP instances ===");
5478 
5479 		  vect_schedule_slp (loop_vinfo, NULL);
5480 		}
5481 
5482 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5483 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5484 		{
5485 		  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5486 		    {
5487 		      pattern_def_seq = NULL;
5488 		      gsi_next (&si);
5489 		    }
5490 		  continue;
5491 		}
5492 	    }
5493 
5494 	  /* -------- vectorize statement ------------ */
5495 	  if (vect_print_dump_info (REPORT_DETAILS))
5496 	    fprintf (vect_dump, "transform statement.");
5497 
5498 	  strided_store = false;
5499 	  is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
5500           if (is_store)
5501             {
5502 	      if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5503 		{
5504 		  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5505 		     interleaving chain was completed - free all the stores in
5506 		     the chain.  */
5507 		  gsi_next (&si);
5508 		  vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5509  		  continue;
5510 		}
5511 	      else
5512 		{
5513 		  /* Free the attached stmt_vec_info and remove the stmt.  */
5514 		  free_stmt_vec_info (gsi_stmt (si));
5515 		  gsi_remove (&si, true);
5516 		  continue;
5517 		}
5518 	    }
5519 
5520 	  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5521 	    {
5522 	      pattern_def_seq = NULL;
5523 	      gsi_next (&si);
5524 	    }
5525 	}		        /* stmts in BB */
5526     }				/* BBs in loop */
5527 
5528   slpeel_make_loop_iterate_ntimes (loop, ratio);
5529 
5530   /* The memory tags and pointers in vectorized statements need to
5531      have their SSA forms updated.  FIXME, why can't this be delayed
5532      until all the loops have been transformed?  */
5533   update_ssa (TODO_update_ssa);
5534 
5535   if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5536     fprintf (vect_dump, "LOOP VECTORIZED.");
5537   if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5538     fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
5539 }
5540