1 /* Loop Vectorization
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3    Free Software Foundation, Inc.
4    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5    Ira Rosen <irar@il.ibm.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "params.h"
40 #include "diagnostic-core.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "target.h"
45 
46 /* Loop Vectorization Pass.
47 
48    This pass tries to vectorize loops.
49 
50    For example, the vectorizer transforms the following simple loop:
51 
52         short a[N]; short b[N]; short c[N]; int i;
53 
54         for (i=0; i<N; i++){
55           a[i] = b[i] + c[i];
56         }
57 
58    as if it was manually vectorized by rewriting the source code into:
59 
60         typedef int __attribute__((mode(V8HI))) v8hi;
61         short a[N];  short b[N]; short c[N];   int i;
62         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
63         v8hi va, vb, vc;
64 
65         for (i=0; i<N/8; i++){
66           vb = pb[i];
67           vc = pc[i];
68           va = vb + vc;
69           pa[i] = va;
70         }
71 
72         The main entry to this pass is vectorize_loops(), in which
73    the vectorizer applies a set of analyses on a given set of loops,
74    followed by the actual vectorization transformation for the loops that
75    had successfully passed the analysis phase.
76         Throughout this pass we make a distinction between two types of
77    data: scalars (which are represented by SSA_NAMES), and memory references
78    ("data-refs").  These two types of data require different handling both
79    during analysis and transformation. The types of data-refs that the
80    vectorizer currently supports are ARRAY_REFS which base is an array DECL
81    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
82    accesses are required to have a simple (consecutive) access pattern.
83 
84    Analysis phase:
85    ===============
86         The driver for the analysis phase is vect_analyze_loop().
87    It applies a set of analyses, some of which rely on the scalar evolution
88    analyzer (scev) developed by Sebastian Pop.
89 
90         During the analysis phase the vectorizer records some information
91    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
92    loop, as well as general information about the loop as a whole, which is
93    recorded in a "loop_vec_info" struct attached to each loop.
94 
95    Transformation phase:
96    =====================
97         The loop transformation phase scans all the stmts in the loop, and
98    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
99    the loop that needs to be vectorized.  It inserts the vector code sequence
100    just before the scalar stmt S, and records a pointer to the vector code
101    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
102    attached to S).  This pointer will be used for the vectorization of following
103    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
104    otherwise, we rely on dead code elimination for removing it.
105 
106         For example, say stmt S1 was vectorized into stmt VS1:
107 
108    VS1: vb = px[i];
109    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
110    S2:  a = b;
111 
112    To vectorize stmt S2, the vectorizer first finds the stmt that defines
113    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
114    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
115    resulting sequence would be:
116 
117    VS1: vb = px[i];
118    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
119    VS2: va = vb;
120    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
121 
122         Operands that are not SSA_NAMEs, are data-refs that appear in
123    load/store operations (like 'x[i]' in S1), and are handled differently.
124 
125    Target modeling:
126    =================
127         Currently the only target specific information that is used is the
128    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
129    Targets that can support different sizes of vectors, for now will need
130    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
131    flexibility will be added in the future.
132 
133         Since we only vectorize operations which vector form can be
134    expressed using existing tree codes, to verify that an operation is
135    supported, the vectorizer checks the relevant optab at the relevant
136    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
137    the value found is CODE_FOR_nothing, then there's no target support, and
138    we can't vectorize the stmt.
139 
140    For additional information on this project see:
141    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
142 */
143 
144 /* Function vect_determine_vectorization_factor
145 
146    Determine the vectorization factor (VF).  VF is the number of data elements
147    that are operated upon in parallel in a single iteration of the vectorized
148    loop.  For example, when vectorizing a loop that operates on 4byte elements,
149    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
150    elements can fit in a single vector register.
151 
152    We currently support vectorization of loops in which all types operated upon
153    are of the same size.  Therefore this function currently sets VF according to
154    the size of the types operated upon, and fails if there are multiple sizes
155    in the loop.
156 
157    VF is also the factor by which the loop iterations are strip-mined, e.g.:
158    original loop:
159         for (i=0; i<N; i++){
160           a[i] = b[i] + c[i];
161         }
162 
163    vectorized loop:
164         for (i=0; i<N; i+=VF){
165           a[i:VF] = b[i:VF] + c[i:VF];
166         }
167 */
168 
169 static bool
170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
171 {
172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
173   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
174   int nbbs = loop->num_nodes;
175   gimple_stmt_iterator si;
176   unsigned int vectorization_factor = 0;
177   tree scalar_type;
178   gimple phi;
179   tree vectype;
180   unsigned int nunits;
181   stmt_vec_info stmt_info;
182   int i;
183   HOST_WIDE_INT dummy;
184   gimple stmt, pattern_stmt = NULL;
185   gimple_seq pattern_def_seq = NULL;
186   gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
187   bool analyze_pattern_stmt = false;
188 
189   if (vect_print_dump_info (REPORT_DETAILS))
190     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
191 
192   for (i = 0; i < nbbs; i++)
193     {
194       basic_block bb = bbs[i];
195 
196       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
197 	{
198 	  phi = gsi_stmt (si);
199 	  stmt_info = vinfo_for_stmt (phi);
200 	  if (vect_print_dump_info (REPORT_DETAILS))
201 	    {
202 	      fprintf (vect_dump, "==> examining phi: ");
203 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
204 	    }
205 
206 	  gcc_assert (stmt_info);
207 
208 	  if (STMT_VINFO_RELEVANT_P (stmt_info))
209             {
210 	      gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
211               scalar_type = TREE_TYPE (PHI_RESULT (phi));
212 
213 	      if (vect_print_dump_info (REPORT_DETAILS))
214 		{
215 		  fprintf (vect_dump, "get vectype for scalar type:  ");
216 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
217 		}
218 
219 	      vectype = get_vectype_for_scalar_type (scalar_type);
220 	      if (!vectype)
221 		{
222 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
223 		    {
224 		      fprintf (vect_dump,
225 		               "not vectorized: unsupported data-type ");
226 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
227 		    }
228 		  return false;
229 		}
230 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
231 
232 	      if (vect_print_dump_info (REPORT_DETAILS))
233 		{
234 		  fprintf (vect_dump, "vectype: ");
235 		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
236 		}
237 
238 	      nunits = TYPE_VECTOR_SUBPARTS (vectype);
239 	      if (vect_print_dump_info (REPORT_DETAILS))
240 		fprintf (vect_dump, "nunits = %d", nunits);
241 
242 	      if (!vectorization_factor
243 		  || (nunits > vectorization_factor))
244 		vectorization_factor = nunits;
245 	    }
246 	}
247 
248       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
249         {
250           tree vf_vectype;
251 
252           if (analyze_pattern_stmt)
253 	    stmt = pattern_stmt;
254           else
255             stmt = gsi_stmt (si);
256 
257           stmt_info = vinfo_for_stmt (stmt);
258 
259 	  if (vect_print_dump_info (REPORT_DETAILS))
260 	    {
261 	      fprintf (vect_dump, "==> examining statement: ");
262 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
263 	    }
264 
265 	  gcc_assert (stmt_info);
266 
267 	  /* Skip stmts which do not need to be vectorized.  */
268 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
269 	      && !STMT_VINFO_LIVE_P (stmt_info))
270             {
271               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
272                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
273                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
274                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
275                 {
276                   stmt = pattern_stmt;
277                   stmt_info = vinfo_for_stmt (pattern_stmt);
278                   if (vect_print_dump_info (REPORT_DETAILS))
279                     {
280                       fprintf (vect_dump, "==> examining pattern statement: ");
281                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
282                     }
283                 }
284               else
285 	        {
286 	          if (vect_print_dump_info (REPORT_DETAILS))
287 	            fprintf (vect_dump, "skip.");
288                   gsi_next (&si);
289 	          continue;
290                 }
291 	    }
292           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
293                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
294                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
295                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
296             analyze_pattern_stmt = true;
297 
298 	  /* If a pattern statement has def stmts, analyze them too.  */
299 	  if (is_pattern_stmt_p (stmt_info))
300 	    {
301 	      if (pattern_def_seq == NULL)
302 		{
303 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
304 		  pattern_def_si = gsi_start (pattern_def_seq);
305 		}
306 	      else if (!gsi_end_p (pattern_def_si))
307 		gsi_next (&pattern_def_si);
308 	      if (pattern_def_seq != NULL)
309 		{
310 		  gimple pattern_def_stmt = NULL;
311 		  stmt_vec_info pattern_def_stmt_info = NULL;
312 
313 		  while (!gsi_end_p (pattern_def_si))
314 		    {
315 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
316 		      pattern_def_stmt_info
317 			= vinfo_for_stmt (pattern_def_stmt);
318 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
319 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
320 			break;
321 		      gsi_next (&pattern_def_si);
322 		    }
323 
324 		  if (!gsi_end_p (pattern_def_si))
325 		    {
326 		      if (vect_print_dump_info (REPORT_DETAILS))
327 			{
328 			  fprintf (vect_dump,
329 				   "==> examining pattern def stmt: ");
330 			  print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
331 					     TDF_SLIM);
332 			}
333 
334 		      stmt = pattern_def_stmt;
335 		      stmt_info = pattern_def_stmt_info;
336 		    }
337 		  else
338 		    {
339 		      pattern_def_si = gsi_start (NULL);
340 		      analyze_pattern_stmt = false;
341 		    }
342 		}
343 	      else
344 		analyze_pattern_stmt = false;
345 	    }
346 
347 	  if (gimple_get_lhs (stmt) == NULL_TREE)
348 	    {
349 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
350 		{
351 	          fprintf (vect_dump, "not vectorized: irregular stmt.");
352 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
353 		}
354 	      return false;
355 	    }
356 
357 	  if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
358 	    {
359 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
360 	        {
361 	          fprintf (vect_dump, "not vectorized: vector stmt in loop:");
362 	          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
363 	        }
364 	      return false;
365 	    }
366 
367 	  if (STMT_VINFO_VECTYPE (stmt_info))
368 	    {
369 	      /* The only case when a vectype had been already set is for stmts
370 	         that contain a dataref, or for "pattern-stmts" (stmts
371 		 generated by the vectorizer to represent/replace a certain
372 		 idiom).  */
373 	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
374 			  || is_pattern_stmt_p (stmt_info)
375 			  || !gsi_end_p (pattern_def_si));
376 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
377 	    }
378 	  else
379 	    {
380 	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
381 	      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
382 	      if (vect_print_dump_info (REPORT_DETAILS))
383 		{
384 		  fprintf (vect_dump, "get vectype for scalar type:  ");
385 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
386 		}
387 	      vectype = get_vectype_for_scalar_type (scalar_type);
388 	      if (!vectype)
389 		{
390 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
391 		    {
392 		      fprintf (vect_dump,
393 			       "not vectorized: unsupported data-type ");
394 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
395 		    }
396 		  return false;
397 		}
398 
399 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
400             }
401 
402 	  /* The vectorization factor is according to the smallest
403 	     scalar type (or the largest vector size, but we only
404 	     support one vector size per loop).  */
405 	  scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
406 						       &dummy);
407 	  if (vect_print_dump_info (REPORT_DETAILS))
408 	    {
409 	      fprintf (vect_dump, "get vectype for scalar type:  ");
410 	      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
411 	    }
412 	  vf_vectype = get_vectype_for_scalar_type (scalar_type);
413 	  if (!vf_vectype)
414 	    {
415 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
416 		{
417 		  fprintf (vect_dump,
418 			   "not vectorized: unsupported data-type ");
419 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
420 		}
421 	      return false;
422 	    }
423 
424 	  if ((GET_MODE_SIZE (TYPE_MODE (vectype))
425 	       != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
426 	    {
427 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
428 		{
429 		  fprintf (vect_dump,
430 			   "not vectorized: different sized vector "
431 			   "types in statement, ");
432 		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
433 		  fprintf (vect_dump, " and ");
434 		  print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
435 		}
436 	      return false;
437 	    }
438 
439 	  if (vect_print_dump_info (REPORT_DETAILS))
440 	    {
441 	      fprintf (vect_dump, "vectype: ");
442 	      print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
443 	    }
444 
445 	  nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
446 	  if (vect_print_dump_info (REPORT_DETAILS))
447 	    fprintf (vect_dump, "nunits = %d", nunits);
448 
449 	  if (!vectorization_factor
450 	      || (nunits > vectorization_factor))
451 	    vectorization_factor = nunits;
452 
453 	  if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
454 	    {
455 	      pattern_def_seq = NULL;
456 	      gsi_next (&si);
457 	    }
458         }
459     }
460 
461   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
462   if (vect_print_dump_info (REPORT_DETAILS))
463     fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
464   if (vectorization_factor <= 1)
465     {
466       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
467         fprintf (vect_dump, "not vectorized: unsupported data-type");
468       return false;
469     }
470   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
471 
472   return true;
473 }
474 
475 
476 /* Function vect_is_simple_iv_evolution.
477 
478    FORNOW: A simple evolution of an induction variables in the loop is
479    considered a polynomial evolution with constant step.  */
480 
481 static bool
482 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
483                              tree * step)
484 {
485   tree init_expr;
486   tree step_expr;
487   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
488 
489   /* When there is no evolution in this loop, the evolution function
490      is not "simple".  */
491   if (evolution_part == NULL_TREE)
492     return false;
493 
494   /* When the evolution is a polynomial of degree >= 2
495      the evolution function is not "simple".  */
496   if (tree_is_chrec (evolution_part))
497     return false;
498 
499   step_expr = evolution_part;
500   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
501 
502   if (vect_print_dump_info (REPORT_DETAILS))
503     {
504       fprintf (vect_dump, "step: ");
505       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
506       fprintf (vect_dump, ",  init: ");
507       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
508     }
509 
510   *init = init_expr;
511   *step = step_expr;
512 
513   if (TREE_CODE (step_expr) != INTEGER_CST)
514     {
515       if (vect_print_dump_info (REPORT_DETAILS))
516         fprintf (vect_dump, "step unknown.");
517       return false;
518     }
519 
520   return true;
521 }
522 
523 /* Function vect_analyze_scalar_cycles_1.
524 
525    Examine the cross iteration def-use cycles of scalar variables
526    in LOOP.  LOOP_VINFO represents the loop that is now being
527    considered for vectorization (can be LOOP, or an outer-loop
528    enclosing LOOP).  */
529 
530 static void
531 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
532 {
533   basic_block bb = loop->header;
534   tree dumy;
535   VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
536   gimple_stmt_iterator gsi;
537   bool double_reduc;
538 
539   if (vect_print_dump_info (REPORT_DETAILS))
540     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
541 
542   /* First - identify all inductions.  Reduction detection assumes that all the
543      inductions have been identified, therefore, this order must not be
544      changed.  */
545   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
546     {
547       gimple phi = gsi_stmt (gsi);
548       tree access_fn = NULL;
549       tree def = PHI_RESULT (phi);
550       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
551 
552       if (vect_print_dump_info (REPORT_DETAILS))
553 	{
554 	  fprintf (vect_dump, "Analyze phi: ");
555 	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
556 	}
557 
558       /* Skip virtual phi's.  The data dependences that are associated with
559          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
560       if (!is_gimple_reg (SSA_NAME_VAR (def)))
561 	continue;
562 
563       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
564 
565       /* Analyze the evolution function.  */
566       access_fn = analyze_scalar_evolution (loop, def);
567       if (access_fn)
568 	{
569 	  STRIP_NOPS (access_fn);
570 	  if (vect_print_dump_info (REPORT_DETAILS))
571 	    {
572 	      fprintf (vect_dump, "Access function of PHI: ");
573 	      print_generic_expr (vect_dump, access_fn, TDF_SLIM);
574 	    }
575 	  STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
576 	    = evolution_part_in_loop_num (access_fn, loop->num);
577 	}
578 
579       if (!access_fn
580 	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
581 	{
582 	  VEC_safe_push (gimple, heap, worklist, phi);
583 	  continue;
584 	}
585 
586       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
587 
588       if (vect_print_dump_info (REPORT_DETAILS))
589 	fprintf (vect_dump, "Detected induction.");
590       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
591     }
592 
593 
594   /* Second - identify all reductions and nested cycles.  */
595   while (VEC_length (gimple, worklist) > 0)
596     {
597       gimple phi = VEC_pop (gimple, worklist);
598       tree def = PHI_RESULT (phi);
599       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
600       gimple reduc_stmt;
601       bool nested_cycle;
602 
603       if (vect_print_dump_info (REPORT_DETAILS))
604         {
605           fprintf (vect_dump, "Analyze phi: ");
606           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
607         }
608 
609       gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
610       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
611 
612       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
613       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
614 						&double_reduc);
615       if (reduc_stmt)
616         {
617           if (double_reduc)
618             {
619               if (vect_print_dump_info (REPORT_DETAILS))
620                 fprintf (vect_dump, "Detected double reduction.");
621 
622               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
623               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
624                                                     vect_double_reduction_def;
625             }
626           else
627             {
628               if (nested_cycle)
629                 {
630                   if (vect_print_dump_info (REPORT_DETAILS))
631                     fprintf (vect_dump, "Detected vectorizable nested cycle.");
632 
633                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
634                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
635                                                              vect_nested_cycle;
636                 }
637               else
638                 {
639                   if (vect_print_dump_info (REPORT_DETAILS))
640                     fprintf (vect_dump, "Detected reduction.");
641 
642                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
643                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
644                                                            vect_reduction_def;
645                   /* Store the reduction cycles for possible vectorization in
646                      loop-aware SLP.  */
647                   VEC_safe_push (gimple, heap,
648                                  LOOP_VINFO_REDUCTIONS (loop_vinfo),
649                                  reduc_stmt);
650                 }
651             }
652         }
653       else
654         if (vect_print_dump_info (REPORT_DETAILS))
655           fprintf (vect_dump, "Unknown def-use cycle pattern.");
656     }
657 
658   VEC_free (gimple, heap, worklist);
659 }
660 
661 
662 /* Function vect_analyze_scalar_cycles.
663 
664    Examine the cross iteration def-use cycles of scalar variables, by
665    analyzing the loop-header PHIs of scalar variables.  Classify each
666    cycle as one of the following: invariant, induction, reduction, unknown.
667    We do that for the loop represented by LOOP_VINFO, and also to its
668    inner-loop, if exists.
669    Examples for scalar cycles:
670 
671    Example1: reduction:
672 
673               loop1:
674               for (i=0; i<N; i++)
675                  sum += a[i];
676 
677    Example2: induction:
678 
679               loop2:
680               for (i=0; i<N; i++)
681                  a[i] = i;  */
682 
683 static void
684 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
685 {
686   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
687 
688   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
689 
690   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
691      Reductions in such inner-loop therefore have different properties than
692      the reductions in the nest that gets vectorized:
693      1. When vectorized, they are executed in the same order as in the original
694         scalar loop, so we can't change the order of computation when
695         vectorizing them.
696      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
697         current checks are too strict.  */
698 
699   if (loop->inner)
700     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
701 }
702 
703 /* Function vect_get_loop_niters.
704 
705    Determine how many iterations the loop is executed.
706    If an expression that represents the number of iterations
707    can be constructed, place it in NUMBER_OF_ITERATIONS.
708    Return the loop exit condition.  */
709 
710 static gimple
711 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
712 {
713   tree niters;
714 
715   if (vect_print_dump_info (REPORT_DETAILS))
716     fprintf (vect_dump, "=== get_loop_niters ===");
717 
718   niters = number_of_exit_cond_executions (loop);
719 
720   if (niters != NULL_TREE
721       && niters != chrec_dont_know)
722     {
723       *number_of_iterations = niters;
724 
725       if (vect_print_dump_info (REPORT_DETAILS))
726         {
727           fprintf (vect_dump, "==> get_loop_niters:" );
728           print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
729         }
730     }
731 
732   return get_loop_exit_condition (loop);
733 }
734 
735 
736 /* Function bb_in_loop_p
737 
738    Used as predicate for dfs order traversal of the loop bbs.  */
739 
740 static bool
741 bb_in_loop_p (const_basic_block bb, const void *data)
742 {
743   const struct loop *const loop = (const struct loop *)data;
744   if (flow_bb_inside_loop_p (loop, bb))
745     return true;
746   return false;
747 }
748 
749 
750 /* Function new_loop_vec_info.
751 
752    Create and initialize a new loop_vec_info struct for LOOP, as well as
753    stmt_vec_info structs for all the stmts in LOOP.  */
754 
755 static loop_vec_info
756 new_loop_vec_info (struct loop *loop)
757 {
758   loop_vec_info res;
759   basic_block *bbs;
760   gimple_stmt_iterator si;
761   unsigned int i, nbbs;
762 
763   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
764   LOOP_VINFO_LOOP (res) = loop;
765 
766   bbs = get_loop_body (loop);
767 
768   /* Create/Update stmt_info for all stmts in the loop.  */
769   for (i = 0; i < loop->num_nodes; i++)
770     {
771       basic_block bb = bbs[i];
772 
773       /* BBs in a nested inner-loop will have been already processed (because
774          we will have called vect_analyze_loop_form for any nested inner-loop).
775          Therefore, for stmts in an inner-loop we just want to update the
776          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
777          loop_info of the outer-loop we are currently considering to vectorize
778          (instead of the loop_info of the inner-loop).
779          For stmts in other BBs we need to create a stmt_info from scratch.  */
780       if (bb->loop_father != loop)
781         {
782           /* Inner-loop bb.  */
783           gcc_assert (loop->inner && bb->loop_father == loop->inner);
784           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
785             {
786               gimple phi = gsi_stmt (si);
787               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
788               loop_vec_info inner_loop_vinfo =
789                 STMT_VINFO_LOOP_VINFO (stmt_info);
790               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
791               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
792             }
793           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
794            {
795               gimple stmt = gsi_stmt (si);
796               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797               loop_vec_info inner_loop_vinfo =
798                  STMT_VINFO_LOOP_VINFO (stmt_info);
799               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
800               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
801            }
802         }
803       else
804         {
805           /* bb in current nest.  */
806           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
807             {
808               gimple phi = gsi_stmt (si);
809               gimple_set_uid (phi, 0);
810               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
811             }
812 
813           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
814             {
815               gimple stmt = gsi_stmt (si);
816               gimple_set_uid (stmt, 0);
817               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
818             }
819         }
820     }
821 
822   /* CHECKME: We want to visit all BBs before their successors (except for
823      latch blocks, for which this assertion wouldn't hold).  In the simple
824      case of the loop forms we allow, a dfs order of the BBs would the same
825      as reversed postorder traversal, so we are safe.  */
826 
827    free (bbs);
828    bbs = XCNEWVEC (basic_block, loop->num_nodes);
829    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
830                               bbs, loop->num_nodes, loop);
831    gcc_assert (nbbs == loop->num_nodes);
832 
833   LOOP_VINFO_BBS (res) = bbs;
834   LOOP_VINFO_NITERS (res) = NULL;
835   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
836   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
837   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
838   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
839   LOOP_VINFO_VECT_FACTOR (res) = 0;
840   LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
841   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
842   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
843   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
844   LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
845     VEC_alloc (gimple, heap,
846                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
847   LOOP_VINFO_MAY_ALIAS_DDRS (res) =
848     VEC_alloc (ddr_p, heap,
849                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
850   LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
851   LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
852   LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
853   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
854   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
855   LOOP_VINFO_PEELING_HTAB (res) = NULL;
856   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
857 
858   return res;
859 }
860 
861 
862 /* Function destroy_loop_vec_info.
863 
864    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
865    stmts in the loop.  */
866 
867 void
868 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
869 {
870   struct loop *loop;
871   basic_block *bbs;
872   int nbbs;
873   gimple_stmt_iterator si;
874   int j;
875   VEC (slp_instance, heap) *slp_instances;
876   slp_instance instance;
877 
878   if (!loop_vinfo)
879     return;
880 
881   loop = LOOP_VINFO_LOOP (loop_vinfo);
882 
883   bbs = LOOP_VINFO_BBS (loop_vinfo);
884   nbbs = loop->num_nodes;
885 
886   if (!clean_stmts)
887     {
888       free (LOOP_VINFO_BBS (loop_vinfo));
889       free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
890       free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
891       VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
892       VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
893       VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
894 
895       free (loop_vinfo);
896       loop->aux = NULL;
897       return;
898     }
899 
900   for (j = 0; j < nbbs; j++)
901     {
902       basic_block bb = bbs[j];
903       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
904         free_stmt_vec_info (gsi_stmt (si));
905 
906       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
907         {
908           gimple stmt = gsi_stmt (si);
909 	  /* Free stmt_vec_info.  */
910 	  free_stmt_vec_info (stmt);
911           gsi_next (&si);
912         }
913     }
914 
915   free (LOOP_VINFO_BBS (loop_vinfo));
916   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
917   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
918   VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
919   VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
920   VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
921   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
922   FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
923     vect_free_slp_instance (instance);
924 
925   VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
926   VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
927   VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
928   VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
929 
930   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
931     htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
932 
933   free (loop_vinfo);
934   loop->aux = NULL;
935 }
936 
937 
938 /* Function vect_analyze_loop_1.
939 
940    Apply a set of analyses on LOOP, and create a loop_vec_info struct
941    for it. The different analyses will record information in the
942    loop_vec_info struct.  This is a subset of the analyses applied in
943    vect_analyze_loop, to be applied on an inner-loop nested in the loop
944    that is now considered for (outer-loop) vectorization.  */
945 
946 static loop_vec_info
947 vect_analyze_loop_1 (struct loop *loop)
948 {
949   loop_vec_info loop_vinfo;
950 
951   if (vect_print_dump_info (REPORT_DETAILS))
952     fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
953 
954   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
955 
956   loop_vinfo = vect_analyze_loop_form (loop);
957   if (!loop_vinfo)
958     {
959       if (vect_print_dump_info (REPORT_DETAILS))
960         fprintf (vect_dump, "bad inner-loop form.");
961       return NULL;
962     }
963 
964   return loop_vinfo;
965 }
966 
967 
968 /* Function vect_analyze_loop_form.
969 
970    Verify that certain CFG restrictions hold, including:
971    - the loop has a pre-header
972    - the loop has a single entry and exit
973    - the loop exit condition is simple enough, and the number of iterations
974      can be analyzed (a countable loop).  */
975 
976 loop_vec_info
977 vect_analyze_loop_form (struct loop *loop)
978 {
979   loop_vec_info loop_vinfo;
980   gimple loop_cond;
981   tree number_of_iterations = NULL;
982   loop_vec_info inner_loop_vinfo = NULL;
983 
984   if (vect_print_dump_info (REPORT_DETAILS))
985     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
986 
987   /* Different restrictions apply when we are considering an inner-most loop,
988      vs. an outer (nested) loop.
989      (FORNOW. May want to relax some of these restrictions in the future).  */
990 
991   if (!loop->inner)
992     {
993       /* Inner-most loop.  We currently require that the number of BBs is
994 	 exactly 2 (the header and latch).  Vectorizable inner-most loops
995 	 look like this:
996 
997                         (pre-header)
998                            |
999                           header <--------+
1000                            | |            |
1001                            | +--> latch --+
1002                            |
1003                         (exit-bb)  */
1004 
1005       if (loop->num_nodes != 2)
1006         {
1007           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1008             fprintf (vect_dump, "not vectorized: control flow in loop.");
1009           return NULL;
1010         }
1011 
1012       if (empty_block_p (loop->header))
1013     {
1014           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1015             fprintf (vect_dump, "not vectorized: empty loop.");
1016       return NULL;
1017     }
1018     }
1019   else
1020     {
1021       struct loop *innerloop = loop->inner;
1022       edge entryedge;
1023 
1024       /* Nested loop. We currently require that the loop is doubly-nested,
1025 	 contains a single inner loop, and the number of BBs is exactly 5.
1026 	 Vectorizable outer-loops look like this:
1027 
1028 			(pre-header)
1029 			   |
1030 			  header <---+
1031 			   |         |
1032 		          inner-loop |
1033 			   |         |
1034 			  tail ------+
1035 			   |
1036 		        (exit-bb)
1037 
1038 	 The inner-loop has the properties expected of inner-most loops
1039 	 as described above.  */
1040 
1041       if ((loop->inner)->inner || (loop->inner)->next)
1042 	{
1043 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1044 	    fprintf (vect_dump, "not vectorized: multiple nested loops.");
1045 	  return NULL;
1046 	}
1047 
1048       /* Analyze the inner-loop.  */
1049       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1050       if (!inner_loop_vinfo)
1051 	{
1052 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1053             fprintf (vect_dump, "not vectorized: Bad inner loop.");
1054 	  return NULL;
1055 	}
1056 
1057       if (!expr_invariant_in_loop_p (loop,
1058 					LOOP_VINFO_NITERS (inner_loop_vinfo)))
1059 	{
1060 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1061 	    fprintf (vect_dump,
1062 		     "not vectorized: inner-loop count not invariant.");
1063 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1064 	  return NULL;
1065 	}
1066 
1067       if (loop->num_nodes != 5)
1068         {
1069 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1070 	    fprintf (vect_dump, "not vectorized: control flow in loop.");
1071 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1072 	  return NULL;
1073         }
1074 
1075       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1076       entryedge = EDGE_PRED (innerloop->header, 0);
1077       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1078 	entryedge = EDGE_PRED (innerloop->header, 1);
1079 
1080       if (entryedge->src != loop->header
1081 	  || !single_exit (innerloop)
1082 	  || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1083 	{
1084 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1085 	    fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
1086 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1087 	  return NULL;
1088 	}
1089 
1090       if (vect_print_dump_info (REPORT_DETAILS))
1091         fprintf (vect_dump, "Considering outer-loop vectorization.");
1092     }
1093 
1094   if (!single_exit (loop)
1095       || EDGE_COUNT (loop->header->preds) != 2)
1096     {
1097       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1098         {
1099           if (!single_exit (loop))
1100             fprintf (vect_dump, "not vectorized: multiple exits.");
1101           else if (EDGE_COUNT (loop->header->preds) != 2)
1102             fprintf (vect_dump, "not vectorized: too many incoming edges.");
1103         }
1104       if (inner_loop_vinfo)
1105 	destroy_loop_vec_info (inner_loop_vinfo, true);
1106       return NULL;
1107     }
1108 
1109   /* We assume that the loop exit condition is at the end of the loop. i.e,
1110      that the loop is represented as a do-while (with a proper if-guard
1111      before the loop if needed), where the loop header contains all the
1112      executable statements, and the latch is empty.  */
1113   if (!empty_block_p (loop->latch)
1114         || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1115     {
1116       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1117         fprintf (vect_dump, "not vectorized: unexpected loop form.");
1118       if (inner_loop_vinfo)
1119 	destroy_loop_vec_info (inner_loop_vinfo, true);
1120       return NULL;
1121     }
1122 
1123   /* Make sure there exists a single-predecessor exit bb:  */
1124   if (!single_pred_p (single_exit (loop)->dest))
1125     {
1126       edge e = single_exit (loop);
1127       if (!(e->flags & EDGE_ABNORMAL))
1128 	{
1129 	  split_loop_exit_edge (e);
1130 	  if (vect_print_dump_info (REPORT_DETAILS))
1131 	    fprintf (vect_dump, "split exit edge.");
1132 	}
1133       else
1134 	{
1135 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1136 	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1137 	  if (inner_loop_vinfo)
1138 	    destroy_loop_vec_info (inner_loop_vinfo, true);
1139 	  return NULL;
1140 	}
1141     }
1142 
1143   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1144   if (!loop_cond)
1145     {
1146       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1147 	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1148       if (inner_loop_vinfo)
1149 	destroy_loop_vec_info (inner_loop_vinfo, true);
1150       return NULL;
1151     }
1152 
1153   if (!number_of_iterations)
1154     {
1155       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1156 	fprintf (vect_dump,
1157 		 "not vectorized: number of iterations cannot be computed.");
1158       if (inner_loop_vinfo)
1159 	destroy_loop_vec_info (inner_loop_vinfo, true);
1160       return NULL;
1161     }
1162 
1163   if (chrec_contains_undetermined (number_of_iterations))
1164     {
1165       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1166         fprintf (vect_dump, "Infinite number of iterations.");
1167       if (inner_loop_vinfo)
1168 	destroy_loop_vec_info (inner_loop_vinfo, true);
1169       return NULL;
1170     }
1171 
1172   if (!NITERS_KNOWN_P (number_of_iterations))
1173     {
1174       if (vect_print_dump_info (REPORT_DETAILS))
1175         {
1176           fprintf (vect_dump, "Symbolic number of iterations is ");
1177           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1178         }
1179     }
1180   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1181     {
1182       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1183         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1184       if (inner_loop_vinfo)
1185         destroy_loop_vec_info (inner_loop_vinfo, false);
1186       return NULL;
1187     }
1188 
1189   loop_vinfo = new_loop_vec_info (loop);
1190   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1191   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1192 
1193   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1194 
1195   /* CHECKME: May want to keep it around it in the future.  */
1196   if (inner_loop_vinfo)
1197     destroy_loop_vec_info (inner_loop_vinfo, false);
1198 
1199   gcc_assert (!loop->aux);
1200   loop->aux = loop_vinfo;
1201   return loop_vinfo;
1202 }
1203 
1204 
1205 /* Get cost by calling cost target builtin.  */
1206 
1207 static inline int
1208 vect_get_cost (enum vect_cost_for_stmt type_of_cost)
1209 {
1210   tree dummy_type = NULL;
1211   int dummy = 0;
1212 
1213   return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1214                                                        dummy_type, dummy);
1215 }
1216 
1217 
1218 /* Function vect_analyze_loop_operations.
1219 
1220    Scan the loop stmts and make sure they are all vectorizable.  */
1221 
1222 static bool
1223 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1224 {
1225   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1226   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1227   int nbbs = loop->num_nodes;
1228   gimple_stmt_iterator si;
1229   unsigned int vectorization_factor = 0;
1230   int i;
1231   gimple phi;
1232   stmt_vec_info stmt_info;
1233   bool need_to_vectorize = false;
1234   int min_profitable_iters;
1235   int min_scalar_loop_bound;
1236   unsigned int th;
1237   bool only_slp_in_loop = true, ok;
1238 
1239   if (vect_print_dump_info (REPORT_DETAILS))
1240     fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1241 
1242   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1243   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1244   if (slp)
1245     {
1246       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1247 	 vectorization factor of the loop is the unrolling factor required by
1248 	 the SLP instances.  If that unrolling factor is 1, we say, that we
1249 	 perform pure SLP on loop - cross iteration parallelism is not
1250 	 exploited.  */
1251       for (i = 0; i < nbbs; i++)
1252 	{
1253 	  basic_block bb = bbs[i];
1254 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1255 	    {
1256 	      gimple stmt = gsi_stmt (si);
1257 	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1258 	      gcc_assert (stmt_info);
1259 	      if ((STMT_VINFO_RELEVANT_P (stmt_info)
1260 		   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1261 		  && !PURE_SLP_STMT (stmt_info))
1262 		/* STMT needs both SLP and loop-based vectorization.  */
1263 		only_slp_in_loop = false;
1264 	    }
1265 	}
1266 
1267       if (only_slp_in_loop)
1268 	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1269       else
1270 	vectorization_factor = least_common_multiple (vectorization_factor,
1271 				LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1272 
1273       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1274       if (vect_print_dump_info (REPORT_DETAILS))
1275 	fprintf (vect_dump, "Updating vectorization factor to %d ",
1276 	 		    vectorization_factor);
1277     }
1278 
1279   for (i = 0; i < nbbs; i++)
1280     {
1281       basic_block bb = bbs[i];
1282 
1283       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1284         {
1285           phi = gsi_stmt (si);
1286           ok = true;
1287 
1288           stmt_info = vinfo_for_stmt (phi);
1289           if (vect_print_dump_info (REPORT_DETAILS))
1290             {
1291               fprintf (vect_dump, "examining phi: ");
1292               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1293             }
1294 
1295           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1296              (i.e., a phi in the tail of the outer-loop).  */
1297           if (! is_loop_header_bb_p (bb))
1298             {
1299               /* FORNOW: we currently don't support the case that these phis
1300                  are not used in the outerloop (unless it is double reduction,
1301                  i.e., this phi is vect_reduction_def), cause this case
1302                  requires to actually do something here.  */
1303               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1304                    || STMT_VINFO_LIVE_P (stmt_info))
1305                   && STMT_VINFO_DEF_TYPE (stmt_info)
1306                      != vect_double_reduction_def)
1307                 {
1308                   if (vect_print_dump_info (REPORT_DETAILS))
1309                     fprintf (vect_dump,
1310                              "Unsupported loop-closed phi in outer-loop.");
1311                   return false;
1312                 }
1313 
1314               /* If PHI is used in the outer loop, we check that its operand
1315                  is defined in the inner loop.  */
1316               if (STMT_VINFO_RELEVANT_P (stmt_info))
1317                 {
1318                   tree phi_op;
1319                   gimple op_def_stmt;
1320 
1321                   if (gimple_phi_num_args (phi) != 1)
1322                     return false;
1323 
1324                   phi_op = PHI_ARG_DEF (phi, 0);
1325                   if (TREE_CODE (phi_op) != SSA_NAME)
1326                     return false;
1327 
1328                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1329 		  if (!op_def_stmt
1330 		      || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1331 		      || !vinfo_for_stmt (op_def_stmt))
1332                     return false;
1333 
1334                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1335                         != vect_used_in_outer
1336                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1337                            != vect_used_in_outer_by_reduction)
1338                     return false;
1339                 }
1340 
1341               continue;
1342             }
1343 
1344           gcc_assert (stmt_info);
1345 
1346           if (STMT_VINFO_LIVE_P (stmt_info))
1347             {
1348               /* FORNOW: not yet supported.  */
1349               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1350                 fprintf (vect_dump, "not vectorized: value used after loop.");
1351               return false;
1352             }
1353 
1354           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1355               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1356             {
1357               /* A scalar-dependence cycle that we don't support.  */
1358               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1359                 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1360               return false;
1361             }
1362 
1363           if (STMT_VINFO_RELEVANT_P (stmt_info))
1364             {
1365               need_to_vectorize = true;
1366               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1367                 ok = vectorizable_induction (phi, NULL, NULL);
1368             }
1369 
1370           if (!ok)
1371             {
1372               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1373                 {
1374                   fprintf (vect_dump,
1375                            "not vectorized: relevant phi not supported: ");
1376                   print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1377                 }
1378               return false;
1379             }
1380         }
1381 
1382       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1383         {
1384           gimple stmt = gsi_stmt (si);
1385 	  if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1386 	    return false;
1387         }
1388     } /* bbs */
1389 
1390   /* All operations in the loop are either irrelevant (deal with loop
1391      control, or dead), or only used outside the loop and can be moved
1392      out of the loop (e.g. invariants, inductions).  The loop can be
1393      optimized away by scalar optimizations.  We're better off not
1394      touching this loop.  */
1395   if (!need_to_vectorize)
1396     {
1397       if (vect_print_dump_info (REPORT_DETAILS))
1398         fprintf (vect_dump,
1399                  "All the computation can be taken out of the loop.");
1400       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1401         fprintf (vect_dump,
1402                  "not vectorized: redundant loop. no profit to vectorize.");
1403       return false;
1404     }
1405 
1406   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1407       && vect_print_dump_info (REPORT_DETAILS))
1408     fprintf (vect_dump,
1409         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1410         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1411 
1412   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1413       && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1414     {
1415       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1416         fprintf (vect_dump, "not vectorized: iteration count too small.");
1417       if (vect_print_dump_info (REPORT_DETAILS))
1418         fprintf (vect_dump,"not vectorized: iteration count smaller than "
1419                  "vectorization factor.");
1420       return false;
1421     }
1422 
1423   /* Analyze cost.  Decide if worth while to vectorize.  */
1424 
1425   /* Once VF is set, SLP costs should be updated since the number of created
1426      vector stmts depends on VF.  */
1427   vect_update_slp_costs_according_to_vf (loop_vinfo);
1428 
1429   min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1430   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1431 
1432   if (min_profitable_iters < 0)
1433     {
1434       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1435         fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1436       if (vect_print_dump_info (REPORT_DETAILS))
1437         fprintf (vect_dump, "not vectorized: vector version will never be "
1438                  "profitable.");
1439       return false;
1440     }
1441 
1442   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1443                             * vectorization_factor) - 1);
1444 
1445   /* Use the cost model only if it is more conservative than user specified
1446      threshold.  */
1447 
1448   th = (unsigned) min_scalar_loop_bound;
1449   if (min_profitable_iters
1450       && (!min_scalar_loop_bound
1451           || min_profitable_iters > min_scalar_loop_bound))
1452     th = (unsigned) min_profitable_iters;
1453 
1454   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1455       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1456     {
1457       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1458         fprintf (vect_dump, "not vectorized: vectorization not "
1459                  "profitable.");
1460       if (vect_print_dump_info (REPORT_DETAILS))
1461         fprintf (vect_dump, "not vectorized: iteration count smaller than "
1462                  "user specified loop bound parameter or minimum "
1463                  "profitable iterations (whichever is more conservative).");
1464       return false;
1465     }
1466 
1467   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1468       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1469       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1470     {
1471       if (vect_print_dump_info (REPORT_DETAILS))
1472         fprintf (vect_dump, "epilog loop required.");
1473       if (!vect_can_advance_ivs_p (loop_vinfo))
1474         {
1475           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1476             fprintf (vect_dump,
1477                      "not vectorized: can't create epilog loop 1.");
1478           return false;
1479         }
1480       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1481         {
1482           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1483             fprintf (vect_dump,
1484                      "not vectorized: can't create epilog loop 2.");
1485           return false;
1486         }
1487     }
1488 
1489   return true;
1490 }
1491 
1492 
1493 /* Function vect_analyze_loop_2.
1494 
1495    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1496    for it.  The different analyses will record information in the
1497    loop_vec_info struct.  */
1498 static bool
1499 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1500 {
1501   bool ok, slp = false;
1502   int max_vf = MAX_VECTORIZATION_FACTOR;
1503   int min_vf = 2;
1504 
1505   /* Find all data references in the loop (which correspond to vdefs/vuses)
1506      and analyze their evolution in the loop.  Also adjust the minimal
1507      vectorization factor according to the loads and stores.
1508 
1509      FORNOW: Handle only simple, array references, which
1510      alignment can be forced, and aligned pointer-references.  */
1511 
1512   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1513   if (!ok)
1514     {
1515       if (vect_print_dump_info (REPORT_DETAILS))
1516 	fprintf (vect_dump, "bad data references.");
1517       return false;
1518     }
1519 
1520   /* Classify all cross-iteration scalar data-flow cycles.
1521      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1522 
1523   vect_analyze_scalar_cycles (loop_vinfo);
1524 
1525   vect_pattern_recog (loop_vinfo);
1526 
1527   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1528 
1529   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1530   if (!ok)
1531     {
1532       if (vect_print_dump_info (REPORT_DETAILS))
1533 	fprintf (vect_dump, "unexpected pattern.");
1534       return false;
1535     }
1536 
1537   /* Analyze data dependences between the data-refs in the loop
1538      and adjust the maximum vectorization factor according to
1539      the dependences.
1540      FORNOW: fail at the first data dependence that we encounter.  */
1541 
1542   ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
1543   if (!ok
1544       || max_vf < min_vf)
1545     {
1546       if (vect_print_dump_info (REPORT_DETAILS))
1547 	fprintf (vect_dump, "bad data dependence.");
1548       return false;
1549     }
1550 
1551   ok = vect_determine_vectorization_factor (loop_vinfo);
1552   if (!ok)
1553     {
1554       if (vect_print_dump_info (REPORT_DETAILS))
1555         fprintf (vect_dump, "can't determine vectorization factor.");
1556       return false;
1557     }
1558   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1559     {
1560       if (vect_print_dump_info (REPORT_DETAILS))
1561 	fprintf (vect_dump, "bad data dependence.");
1562       return false;
1563     }
1564 
1565   /* Analyze the alignment of the data-refs in the loop.
1566      Fail if a data reference is found that cannot be vectorized.  */
1567 
1568   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1569   if (!ok)
1570     {
1571       if (vect_print_dump_info (REPORT_DETAILS))
1572 	fprintf (vect_dump, "bad data alignment.");
1573       return false;
1574     }
1575 
1576   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1577      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1578 
1579   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1580   if (!ok)
1581     {
1582       if (vect_print_dump_info (REPORT_DETAILS))
1583 	fprintf (vect_dump, "bad data access.");
1584       return false;
1585     }
1586 
1587   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1588      It is important to call pruning after vect_analyze_data_ref_accesses,
1589      since we use grouping information gathered by interleaving analysis.  */
1590   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1591   if (!ok)
1592     {
1593       if (vect_print_dump_info (REPORT_DETAILS))
1594 	fprintf (vect_dump, "too long list of versioning for alias "
1595 			    "run-time tests.");
1596       return false;
1597     }
1598 
1599   /* This pass will decide on using loop versioning and/or loop peeling in
1600      order to enhance the alignment of data references in the loop.  */
1601 
1602   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1603   if (!ok)
1604     {
1605       if (vect_print_dump_info (REPORT_DETAILS))
1606         fprintf (vect_dump, "bad data alignment.");
1607       return false;
1608     }
1609 
1610   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1611   ok = vect_analyze_slp (loop_vinfo, NULL);
1612   if (ok)
1613     {
1614       /* Decide which possible SLP instances to SLP.  */
1615       slp = vect_make_slp_decision (loop_vinfo);
1616 
1617       /* Find stmts that need to be both vectorized and SLPed.  */
1618       vect_detect_hybrid_slp (loop_vinfo);
1619     }
1620   else
1621     return false;
1622 
1623   /* Scan all the operations in the loop and make sure they are
1624      vectorizable.  */
1625 
1626   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1627   if (!ok)
1628     {
1629       if (vect_print_dump_info (REPORT_DETAILS))
1630 	fprintf (vect_dump, "bad operation or unsupported loop bound.");
1631       return false;
1632     }
1633 
1634   return true;
1635 }
1636 
1637 /* Function vect_analyze_loop.
1638 
1639    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1640    for it.  The different analyses will record information in the
1641    loop_vec_info struct.  */
1642 loop_vec_info
1643 vect_analyze_loop (struct loop *loop)
1644 {
1645   loop_vec_info loop_vinfo;
1646   unsigned int vector_sizes;
1647 
1648   /* Autodetect first vector size we try.  */
1649   current_vector_size = 0;
1650   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1651 
1652   if (vect_print_dump_info (REPORT_DETAILS))
1653     fprintf (vect_dump, "===== analyze_loop_nest =====");
1654 
1655   if (loop_outer (loop)
1656       && loop_vec_info_for_loop (loop_outer (loop))
1657       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1658     {
1659       if (vect_print_dump_info (REPORT_DETAILS))
1660 	fprintf (vect_dump, "outer-loop already vectorized.");
1661       return NULL;
1662     }
1663 
1664   while (1)
1665     {
1666       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1667       loop_vinfo = vect_analyze_loop_form (loop);
1668       if (!loop_vinfo)
1669 	{
1670 	  if (vect_print_dump_info (REPORT_DETAILS))
1671 	    fprintf (vect_dump, "bad loop form.");
1672 	  return NULL;
1673 	}
1674 
1675       if (vect_analyze_loop_2 (loop_vinfo))
1676 	{
1677 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1678 
1679 	  return loop_vinfo;
1680 	}
1681 
1682       destroy_loop_vec_info (loop_vinfo, true);
1683 
1684       vector_sizes &= ~current_vector_size;
1685       if (vector_sizes == 0
1686 	  || current_vector_size == 0)
1687 	return NULL;
1688 
1689       /* Try the next biggest vector size.  */
1690       current_vector_size = 1 << floor_log2 (vector_sizes);
1691       if (vect_print_dump_info (REPORT_DETAILS))
1692 	fprintf (vect_dump, "***** Re-trying analysis with "
1693 		 "vector size %d\n", current_vector_size);
1694     }
1695 }
1696 
1697 
1698 /* Function reduction_code_for_scalar_code
1699 
1700    Input:
1701    CODE - tree_code of a reduction operations.
1702 
1703    Output:
1704    REDUC_CODE - the corresponding tree-code to be used to reduce the
1705       vector of partial results into a single scalar result (which
1706       will also reside in a vector) or ERROR_MARK if the operation is
1707       a supported reduction operation, but does not have such tree-code.
1708 
1709    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1710 
1711 static bool
1712 reduction_code_for_scalar_code (enum tree_code code,
1713                                 enum tree_code *reduc_code)
1714 {
1715   switch (code)
1716     {
1717       case MAX_EXPR:
1718         *reduc_code = REDUC_MAX_EXPR;
1719         return true;
1720 
1721       case MIN_EXPR:
1722         *reduc_code = REDUC_MIN_EXPR;
1723         return true;
1724 
1725       case PLUS_EXPR:
1726         *reduc_code = REDUC_PLUS_EXPR;
1727         return true;
1728 
1729       case MULT_EXPR:
1730       case MINUS_EXPR:
1731       case BIT_IOR_EXPR:
1732       case BIT_XOR_EXPR:
1733       case BIT_AND_EXPR:
1734         *reduc_code = ERROR_MARK;
1735         return true;
1736 
1737       default:
1738        return false;
1739     }
1740 }
1741 
1742 
1743 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1744    STMT is printed with a message MSG. */
1745 
1746 static void
1747 report_vect_op (gimple stmt, const char *msg)
1748 {
1749   fprintf (vect_dump, "%s", msg);
1750   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1751 }
1752 
1753 
1754 /* Detect SLP reduction of the form:
1755 
1756    #a1 = phi <a5, a0>
1757    a2 = operation (a1)
1758    a3 = operation (a2)
1759    a4 = operation (a3)
1760    a5 = operation (a4)
1761 
1762    #a = phi <a5>
1763 
1764    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1765    FIRST_STMT is the first reduction stmt in the chain
1766    (a2 = operation (a1)).
1767 
1768    Return TRUE if a reduction chain was detected.  */
1769 
1770 static bool
1771 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1772 {
1773   struct loop *loop = (gimple_bb (phi))->loop_father;
1774   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1775   enum tree_code code;
1776   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1777   stmt_vec_info use_stmt_info, current_stmt_info;
1778   tree lhs;
1779   imm_use_iterator imm_iter;
1780   use_operand_p use_p;
1781   int nloop_uses, size = 0, n_out_of_loop_uses;
1782   bool found = false;
1783 
1784   if (loop != vect_loop)
1785     return false;
1786 
1787   lhs = PHI_RESULT (phi);
1788   code = gimple_assign_rhs_code (first_stmt);
1789   while (1)
1790     {
1791       nloop_uses = 0;
1792       n_out_of_loop_uses = 0;
1793       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1794         {
1795 	  gimple use_stmt = USE_STMT (use_p);
1796           if (is_gimple_debug (use_stmt))
1797             continue;
1798 
1799 	  use_stmt = USE_STMT (use_p);
1800 
1801           /* Check if we got back to the reduction phi.  */
1802 	  if (use_stmt == phi)
1803             {
1804 	      loop_use_stmt = use_stmt;
1805               found = true;
1806               break;
1807             }
1808 
1809           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1810             {
1811               if (vinfo_for_stmt (use_stmt)
1812                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1813                 {
1814                   loop_use_stmt = use_stmt;
1815                   nloop_uses++;
1816                 }
1817             }
1818            else
1819              n_out_of_loop_uses++;
1820 
1821            /* There are can be either a single use in the loop or two uses in
1822               phi nodes.  */
1823            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1824              return false;
1825         }
1826 
1827       if (found)
1828         break;
1829 
1830       /* We reached a statement with no loop uses.  */
1831       if (nloop_uses == 0)
1832 	return false;
1833 
1834       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1835       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1836         return false;
1837 
1838       if (!is_gimple_assign (loop_use_stmt)
1839 	  || code != gimple_assign_rhs_code (loop_use_stmt)
1840 	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1841         return false;
1842 
1843       /* Insert USE_STMT into reduction chain.  */
1844       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1845       if (current_stmt)
1846         {
1847           current_stmt_info = vinfo_for_stmt (current_stmt);
1848 	  GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1849           GROUP_FIRST_ELEMENT (use_stmt_info)
1850             = GROUP_FIRST_ELEMENT (current_stmt_info);
1851         }
1852       else
1853 	GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1854 
1855       lhs = gimple_assign_lhs (loop_use_stmt);
1856       current_stmt = loop_use_stmt;
1857       size++;
1858    }
1859 
1860   if (!found || loop_use_stmt != phi || size < 2)
1861     return false;
1862 
1863   /* Swap the operands, if needed, to make the reduction operand be the second
1864      operand.  */
1865   lhs = PHI_RESULT (phi);
1866   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1867   while (next_stmt)
1868     {
1869       if (gimple_assign_rhs2 (next_stmt) == lhs)
1870 	{
1871 	  tree op = gimple_assign_rhs1 (next_stmt);
1872           gimple def_stmt = NULL;
1873 
1874           if (TREE_CODE (op) == SSA_NAME)
1875             def_stmt = SSA_NAME_DEF_STMT (op);
1876 
1877 	  /* Check that the other def is either defined in the loop
1878 	     ("vect_internal_def"), or it's an induction (defined by a
1879 	     loop-header phi-node).  */
1880           if (def_stmt
1881               && gimple_bb (def_stmt)
1882 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1883               && (is_gimple_assign (def_stmt)
1884                   || is_gimple_call (def_stmt)
1885                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1886                            == vect_induction_def
1887                   || (gimple_code (def_stmt) == GIMPLE_PHI
1888                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1889                                   == vect_internal_def
1890                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1891 	    {
1892 	      lhs = gimple_assign_lhs (next_stmt);
1893 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1894  	      continue;
1895 	    }
1896 
1897 	  return false;
1898 	}
1899       else
1900 	{
1901           tree op = gimple_assign_rhs2 (next_stmt);
1902           gimple def_stmt = NULL;
1903 
1904           if (TREE_CODE (op) == SSA_NAME)
1905             def_stmt = SSA_NAME_DEF_STMT (op);
1906 
1907           /* Check that the other def is either defined in the loop
1908             ("vect_internal_def"), or it's an induction (defined by a
1909             loop-header phi-node).  */
1910           if (def_stmt
1911               && gimple_bb (def_stmt)
1912 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1913               && (is_gimple_assign (def_stmt)
1914                   || is_gimple_call (def_stmt)
1915                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1916                               == vect_induction_def
1917                   || (gimple_code (def_stmt) == GIMPLE_PHI
1918                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1919                                   == vect_internal_def
1920                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1921   	    {
1922 	      if (vect_print_dump_info (REPORT_DETAILS))
1923 		{
1924 		  fprintf (vect_dump, "swapping oprnds: ");
1925 		  print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
1926 		}
1927 
1928 	      swap_tree_operands (next_stmt,
1929 	 		          gimple_assign_rhs1_ptr (next_stmt),
1930                                   gimple_assign_rhs2_ptr (next_stmt));
1931 	      mark_symbols_for_renaming (next_stmt);
1932 	    }
1933 	  else
1934 	    return false;
1935         }
1936 
1937       lhs = gimple_assign_lhs (next_stmt);
1938       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1939     }
1940 
1941   /* Save the chain for further analysis in SLP detection.  */
1942   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1943   VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
1944   GROUP_SIZE (vinfo_for_stmt (first)) = size;
1945 
1946   return true;
1947 }
1948 
1949 
1950 /* Function vect_is_simple_reduction_1
1951 
1952    (1) Detect a cross-iteration def-use cycle that represents a simple
1953    reduction computation.  We look for the following pattern:
1954 
1955    loop_header:
1956      a1 = phi < a0, a2 >
1957      a3 = ...
1958      a2 = operation (a3, a1)
1959 
1960    such that:
1961    1. operation is commutative and associative and it is safe to
1962       change the order of the computation (if CHECK_REDUCTION is true)
1963    2. no uses for a2 in the loop (a2 is used out of the loop)
1964    3. no uses of a1 in the loop besides the reduction operation
1965    4. no uses of a1 outside the loop.
1966 
1967    Conditions 1,4 are tested here.
1968    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1969 
1970    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1971    nested cycles, if CHECK_REDUCTION is false.
1972 
1973    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1974    reductions:
1975 
1976      a1 = phi < a0, a2 >
1977      inner loop (def of a3)
1978      a2 = phi < a3 >
1979 
1980    If MODIFY is true it tries also to rework the code in-place to enable
1981    detection of more reduction patterns.  For the time being we rewrite
1982    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1983 */
1984 
1985 static gimple
1986 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
1987 			    bool check_reduction, bool *double_reduc,
1988 			    bool modify)
1989 {
1990   struct loop *loop = (gimple_bb (phi))->loop_father;
1991   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1992   edge latch_e = loop_latch_edge (loop);
1993   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1994   gimple def_stmt, def1 = NULL, def2 = NULL;
1995   enum tree_code orig_code, code;
1996   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1997   tree type;
1998   int nloop_uses;
1999   tree name;
2000   imm_use_iterator imm_iter;
2001   use_operand_p use_p;
2002   bool phi_def;
2003 
2004   *double_reduc = false;
2005 
2006   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2007      otherwise, we assume outer loop vectorization.  */
2008   gcc_assert ((check_reduction && loop == vect_loop)
2009               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2010 
2011   name = PHI_RESULT (phi);
2012   /* ???  If there are no uses of the PHI result the inner loop reduction
2013      won't be detected as possibly double-reduction by vectorizable_reduction
2014      because that tries to walk the PHI arg from the preheader edge which
2015      can be constant.  See PR60382.  */
2016   if (has_zero_uses (name))
2017     return NULL;
2018   nloop_uses = 0;
2019   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2020     {
2021       gimple use_stmt = USE_STMT (use_p);
2022       if (is_gimple_debug (use_stmt))
2023 	continue;
2024 
2025       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2026         {
2027           if (vect_print_dump_info (REPORT_DETAILS))
2028             fprintf (vect_dump, "intermediate value used outside loop.");
2029 
2030           return NULL;
2031         }
2032 
2033       if (vinfo_for_stmt (use_stmt)
2034 	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2035         nloop_uses++;
2036       if (nloop_uses > 1)
2037         {
2038           if (vect_print_dump_info (REPORT_DETAILS))
2039             fprintf (vect_dump, "reduction used in loop.");
2040           return NULL;
2041         }
2042     }
2043 
2044   if (TREE_CODE (loop_arg) != SSA_NAME)
2045     {
2046       if (vect_print_dump_info (REPORT_DETAILS))
2047 	{
2048 	  fprintf (vect_dump, "reduction: not ssa_name: ");
2049 	  print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2050 	}
2051       return NULL;
2052     }
2053 
2054   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2055   if (!def_stmt)
2056     {
2057       if (vect_print_dump_info (REPORT_DETAILS))
2058 	fprintf (vect_dump, "reduction: no def_stmt.");
2059       return NULL;
2060     }
2061 
2062   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2063     {
2064       if (vect_print_dump_info (REPORT_DETAILS))
2065         print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
2066       return NULL;
2067     }
2068 
2069   if (is_gimple_assign (def_stmt))
2070     {
2071       name = gimple_assign_lhs (def_stmt);
2072       phi_def = false;
2073     }
2074   else
2075     {
2076       name = PHI_RESULT (def_stmt);
2077       phi_def = true;
2078     }
2079 
2080   nloop_uses = 0;
2081   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2082     {
2083       gimple use_stmt = USE_STMT (use_p);
2084       if (is_gimple_debug (use_stmt))
2085 	continue;
2086       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2087 	  && vinfo_for_stmt (use_stmt)
2088 	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2089 	nloop_uses++;
2090       if (nloop_uses > 1)
2091 	{
2092 	  if (vect_print_dump_info (REPORT_DETAILS))
2093 	    fprintf (vect_dump, "reduction used in loop.");
2094 	  return NULL;
2095 	}
2096     }
2097 
2098   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2099      defined in the inner loop.  */
2100   if (phi_def)
2101     {
2102       op1 = PHI_ARG_DEF (def_stmt, 0);
2103 
2104       if (gimple_phi_num_args (def_stmt) != 1
2105           || TREE_CODE (op1) != SSA_NAME)
2106         {
2107           if (vect_print_dump_info (REPORT_DETAILS))
2108             fprintf (vect_dump, "unsupported phi node definition.");
2109 
2110           return NULL;
2111         }
2112 
2113       def1 = SSA_NAME_DEF_STMT (op1);
2114       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2115           && loop->inner
2116           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2117           && is_gimple_assign (def1))
2118         {
2119           if (vect_print_dump_info (REPORT_DETAILS))
2120             report_vect_op (def_stmt, "detected double reduction: ");
2121 
2122           *double_reduc = true;
2123           return def_stmt;
2124         }
2125 
2126       return NULL;
2127     }
2128 
2129   code = orig_code = gimple_assign_rhs_code (def_stmt);
2130 
2131   /* We can handle "res -= x[i]", which is non-associative by
2132      simply rewriting this into "res += -x[i]".  Avoid changing
2133      gimple instruction for the first simple tests and only do this
2134      if we're allowed to change code at all.  */
2135   if (code == MINUS_EXPR
2136       && modify
2137       && (op1 = gimple_assign_rhs1 (def_stmt))
2138       && TREE_CODE (op1) == SSA_NAME
2139       && SSA_NAME_DEF_STMT (op1) == phi)
2140     code = PLUS_EXPR;
2141 
2142   if (check_reduction
2143       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2144     {
2145       if (vect_print_dump_info (REPORT_DETAILS))
2146         report_vect_op (def_stmt, "reduction: not commutative/associative: ");
2147       return NULL;
2148     }
2149 
2150   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2151     {
2152       if (code != COND_EXPR)
2153         {
2154           if (vect_print_dump_info (REPORT_DETAILS))
2155 	    report_vect_op (def_stmt, "reduction: not binary operation: ");
2156 
2157           return NULL;
2158         }
2159 
2160       op3 = gimple_assign_rhs1 (def_stmt);
2161       if (COMPARISON_CLASS_P (op3))
2162         {
2163           op4 = TREE_OPERAND (op3, 1);
2164           op3 = TREE_OPERAND (op3, 0);
2165         }
2166 
2167       op1 = gimple_assign_rhs2 (def_stmt);
2168       op2 = gimple_assign_rhs3 (def_stmt);
2169 
2170       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2171         {
2172           if (vect_print_dump_info (REPORT_DETAILS))
2173             report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2174 
2175           return NULL;
2176         }
2177     }
2178   else
2179     {
2180       op1 = gimple_assign_rhs1 (def_stmt);
2181       op2 = gimple_assign_rhs2 (def_stmt);
2182 
2183       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2184         {
2185           if (vect_print_dump_info (REPORT_DETAILS))
2186 	    report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2187 
2188           return NULL;
2189         }
2190    }
2191 
2192   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2193   if ((TREE_CODE (op1) == SSA_NAME
2194        && !types_compatible_p (type,TREE_TYPE (op1)))
2195       || (TREE_CODE (op2) == SSA_NAME
2196           && !types_compatible_p (type, TREE_TYPE (op2)))
2197       || (op3 && TREE_CODE (op3) == SSA_NAME
2198           && !types_compatible_p (type, TREE_TYPE (op3)))
2199       || (op4 && TREE_CODE (op4) == SSA_NAME
2200           && !types_compatible_p (type, TREE_TYPE (op4))))
2201     {
2202       if (vect_print_dump_info (REPORT_DETAILS))
2203         {
2204           fprintf (vect_dump, "reduction: multiple types: operation type: ");
2205           print_generic_expr (vect_dump, type, TDF_SLIM);
2206           fprintf (vect_dump, ", operands types: ");
2207           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2208           fprintf (vect_dump, ",");
2209           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2210           if (op3)
2211             {
2212               fprintf (vect_dump, ",");
2213               print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
2214             }
2215 
2216           if (op4)
2217             {
2218               fprintf (vect_dump, ",");
2219               print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
2220             }
2221         }
2222 
2223       return NULL;
2224     }
2225 
2226   /* Check that it's ok to change the order of the computation.
2227      Generally, when vectorizing a reduction we change the order of the
2228      computation.  This may change the behavior of the program in some
2229      cases, so we need to check that this is ok.  One exception is when
2230      vectorizing an outer-loop: the inner-loop is executed sequentially,
2231      and therefore vectorizing reductions in the inner-loop during
2232      outer-loop vectorization is safe.  */
2233 
2234   /* CHECKME: check for !flag_finite_math_only too?  */
2235   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2236       && check_reduction)
2237     {
2238       /* Changing the order of operations changes the semantics.  */
2239       if (vect_print_dump_info (REPORT_DETAILS))
2240 	report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
2241       return NULL;
2242     }
2243   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2244 	   && check_reduction)
2245     {
2246       /* Changing the order of operations changes the semantics.  */
2247       if (vect_print_dump_info (REPORT_DETAILS))
2248 	report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
2249       return NULL;
2250     }
2251   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2252     {
2253       /* Changing the order of operations changes the semantics.  */
2254       if (vect_print_dump_info (REPORT_DETAILS))
2255 	report_vect_op (def_stmt,
2256 			"reduction: unsafe fixed-point math optimization: ");
2257       return NULL;
2258     }
2259 
2260   /* If we detected "res -= x[i]" earlier, rewrite it into
2261      "res += -x[i]" now.  If this turns out to be useless reassoc
2262      will clean it up again.  */
2263   if (orig_code == MINUS_EXPR)
2264     {
2265       tree rhs = gimple_assign_rhs2 (def_stmt);
2266       tree var = TREE_CODE (rhs) == SSA_NAME
2267 		 ? SSA_NAME_VAR (rhs)
2268 		 : create_tmp_reg (TREE_TYPE (rhs), NULL);
2269       tree negrhs = make_ssa_name (var, NULL);
2270       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2271 							 rhs, NULL);
2272       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2273       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2274 							  loop_info, NULL));
2275       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2276       gimple_assign_set_rhs2 (def_stmt, negrhs);
2277       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2278       update_stmt (def_stmt);
2279     }
2280 
2281   /* Reduction is safe. We're dealing with one of the following:
2282      1) integer arithmetic and no trapv
2283      2) floating point arithmetic, and special flags permit this optimization
2284      3) nested cycle (i.e., outer loop vectorization).  */
2285   if (TREE_CODE (op1) == SSA_NAME)
2286     def1 = SSA_NAME_DEF_STMT (op1);
2287 
2288   if (TREE_CODE (op2) == SSA_NAME)
2289     def2 = SSA_NAME_DEF_STMT (op2);
2290 
2291   if (code != COND_EXPR
2292       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2293     {
2294       if (vect_print_dump_info (REPORT_DETAILS))
2295 	report_vect_op (def_stmt, "reduction: no defs for operands: ");
2296       return NULL;
2297     }
2298 
2299   /* Check that one def is the reduction def, defined by PHI,
2300      the other def is either defined in the loop ("vect_internal_def"),
2301      or it's an induction (defined by a loop-header phi-node).  */
2302 
2303   if (def2 && def2 == phi
2304       && (code == COND_EXPR
2305 	  || !def1 || gimple_nop_p (def1)
2306           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2307               && (is_gimple_assign (def1)
2308 		  || is_gimple_call (def1)
2309   	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2310                       == vect_induction_def
2311    	          || (gimple_code (def1) == GIMPLE_PHI
2312 	              && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2313                           == vect_internal_def
2314  	              && !is_loop_header_bb_p (gimple_bb (def1)))))))
2315     {
2316       if (vect_print_dump_info (REPORT_DETAILS))
2317 	report_vect_op (def_stmt, "detected reduction: ");
2318       return def_stmt;
2319     }
2320 
2321   if (def1 && def1 == phi
2322       && (code == COND_EXPR
2323 	  || !def2 || gimple_nop_p (def2)
2324           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2325  	      && (is_gimple_assign (def2)
2326 		  || is_gimple_call (def2)
2327 	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2328                       == vect_induction_def
2329  	          || (gimple_code (def2) == GIMPLE_PHI
2330 		      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2331                           == vect_internal_def
2332 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
2333     {
2334       if (check_reduction)
2335         {
2336           /* Swap operands (just for simplicity - so that the rest of the code
2337 	     can assume that the reduction variable is always the last (second)
2338 	     argument).  */
2339           if (vect_print_dump_info (REPORT_DETAILS))
2340 	    report_vect_op (def_stmt,
2341 	  	            "detected reduction: need to swap operands: ");
2342 
2343           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2344  			      gimple_assign_rhs2_ptr (def_stmt));
2345         }
2346       else
2347         {
2348           if (vect_print_dump_info (REPORT_DETAILS))
2349             report_vect_op (def_stmt, "detected reduction: ");
2350         }
2351 
2352       return def_stmt;
2353     }
2354 
2355   /* Try to find SLP reduction chain.  */
2356   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2357     {
2358       if (vect_print_dump_info (REPORT_DETAILS))
2359         report_vect_op (def_stmt, "reduction: detected reduction chain: ");
2360 
2361       return def_stmt;
2362     }
2363 
2364   if (vect_print_dump_info (REPORT_DETAILS))
2365     report_vect_op (def_stmt, "reduction: unknown pattern: ");
2366 
2367   return NULL;
2368 }
2369 
2370 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2371    in-place.  Arguments as there.  */
2372 
2373 static gimple
2374 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2375                           bool check_reduction, bool *double_reduc)
2376 {
2377   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2378 				     double_reduc, false);
2379 }
2380 
2381 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2382    in-place if it enables detection of more reductions.  Arguments
2383    as there.  */
2384 
2385 gimple
2386 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2387                           bool check_reduction, bool *double_reduc)
2388 {
2389   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2390 				     double_reduc, true);
2391 }
2392 
2393 /* Calculate the cost of one scalar iteration of the loop.  */
2394 int
2395 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2396 {
2397   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2398   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2399   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2400   int innerloop_iters, i, stmt_cost;
2401 
2402   /* Count statements in scalar loop.  Using this as scalar cost for a single
2403      iteration for now.
2404 
2405      TODO: Add outer loop support.
2406 
2407      TODO: Consider assigning different costs to different scalar
2408      statements.  */
2409 
2410   /* FORNOW.  */
2411   innerloop_iters = 1;
2412   if (loop->inner)
2413     innerloop_iters = 50; /* FIXME */
2414 
2415   for (i = 0; i < nbbs; i++)
2416     {
2417       gimple_stmt_iterator si;
2418       basic_block bb = bbs[i];
2419 
2420       if (bb->loop_father == loop->inner)
2421         factor = innerloop_iters;
2422       else
2423         factor = 1;
2424 
2425       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2426         {
2427           gimple stmt = gsi_stmt (si);
2428           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2429 
2430           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2431             continue;
2432 
2433           /* Skip stmts that are not vectorized inside the loop.  */
2434           if (stmt_info
2435               && !STMT_VINFO_RELEVANT_P (stmt_info)
2436               && (!STMT_VINFO_LIVE_P (stmt_info)
2437                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2438 	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2439             continue;
2440 
2441           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2442             {
2443               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2444                stmt_cost = vect_get_cost (scalar_load);
2445              else
2446                stmt_cost = vect_get_cost (scalar_store);
2447             }
2448           else
2449             stmt_cost = vect_get_cost (scalar_stmt);
2450 
2451           scalar_single_iter_cost += stmt_cost * factor;
2452         }
2453     }
2454   return scalar_single_iter_cost;
2455 }
2456 
2457 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2458 int
2459 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2460                              int *peel_iters_epilogue,
2461                              int scalar_single_iter_cost)
2462 {
2463   int peel_guard_costs = 0;
2464   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2465 
2466   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2467     {
2468       *peel_iters_epilogue = vf/2;
2469       if (vect_print_dump_info (REPORT_COST))
2470         fprintf (vect_dump, "cost model: "
2471                             "epilogue peel iters set to vf/2 because "
2472                             "loop iterations are unknown .");
2473 
2474       /* If peeled iterations are known but number of scalar loop
2475          iterations are unknown, count a taken branch per peeled loop.  */
2476       peel_guard_costs =  2 * vect_get_cost (cond_branch_taken);
2477     }
2478   else
2479     {
2480       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2481       peel_iters_prologue = niters < peel_iters_prologue ?
2482                             niters : peel_iters_prologue;
2483       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2484       /* If we need to peel for gaps, but no peeling is required, we have to
2485 	 peel VF iterations.  */
2486       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2487         *peel_iters_epilogue = vf;
2488     }
2489 
2490    return (peel_iters_prologue * scalar_single_iter_cost)
2491             + (*peel_iters_epilogue * scalar_single_iter_cost)
2492            + peel_guard_costs;
2493 }
2494 
2495 /* Function vect_estimate_min_profitable_iters
2496 
2497    Return the number of iterations required for the vector version of the
2498    loop to be profitable relative to the cost of the scalar version of the
2499    loop.
2500 
2501    TODO: Take profile info into account before making vectorization
2502    decisions, if available.  */
2503 
2504 int
2505 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
2506 {
2507   int i;
2508   int min_profitable_iters;
2509   int peel_iters_prologue;
2510   int peel_iters_epilogue;
2511   int vec_inside_cost = 0;
2512   int vec_outside_cost = 0;
2513   int scalar_single_iter_cost = 0;
2514   int scalar_outside_cost = 0;
2515   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2516   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2517   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2518   int nbbs = loop->num_nodes;
2519   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2520   int peel_guard_costs = 0;
2521   int innerloop_iters = 0, factor;
2522   VEC (slp_instance, heap) *slp_instances;
2523   slp_instance instance;
2524 
2525   /* Cost model disabled.  */
2526   if (!flag_vect_cost_model)
2527     {
2528       if (vect_print_dump_info (REPORT_COST))
2529         fprintf (vect_dump, "cost model disabled.");
2530       return 0;
2531     }
2532 
2533   /* Requires loop versioning tests to handle misalignment.  */
2534   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2535     {
2536       /*  FIXME: Make cost depend on complexity of individual check.  */
2537       vec_outside_cost +=
2538 	VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2539       if (vect_print_dump_info (REPORT_COST))
2540         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2541                  "versioning to treat misalignment.\n");
2542     }
2543 
2544   /* Requires loop versioning with alias checks.  */
2545   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2546     {
2547       /*  FIXME: Make cost depend on complexity of individual check.  */
2548       vec_outside_cost +=
2549         VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2550       if (vect_print_dump_info (REPORT_COST))
2551         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2552                  "versioning aliasing.\n");
2553     }
2554 
2555   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2556       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2557     vec_outside_cost += vect_get_cost (cond_branch_taken);
2558 
2559   /* Count statements in scalar loop.  Using this as scalar cost for a single
2560      iteration for now.
2561 
2562      TODO: Add outer loop support.
2563 
2564      TODO: Consider assigning different costs to different scalar
2565      statements.  */
2566 
2567   /* FORNOW.  */
2568   if (loop->inner)
2569     innerloop_iters = 50; /* FIXME */
2570 
2571   for (i = 0; i < nbbs; i++)
2572     {
2573       gimple_stmt_iterator si;
2574       basic_block bb = bbs[i];
2575 
2576       if (bb->loop_father == loop->inner)
2577  	factor = innerloop_iters;
2578       else
2579  	factor = 1;
2580 
2581       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2582 	{
2583 	  gimple stmt = gsi_stmt (si);
2584 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2585 
2586 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2587 	    {
2588 	      stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2589 	      stmt_info = vinfo_for_stmt (stmt);
2590 	    }
2591 
2592 	  /* Skip stmts that are not vectorized inside the loop.  */
2593 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
2594 	      && (!STMT_VINFO_LIVE_P (stmt_info)
2595                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
2596 	    continue;
2597 
2598 	  vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
2599 	  /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2600 	     some of the "outside" costs are generated inside the outer-loop.  */
2601 	  vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
2602           if (is_pattern_stmt_p (stmt_info)
2603 	      && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
2604             {
2605 	      gimple_stmt_iterator gsi;
2606 
2607 	      for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2608 		   !gsi_end_p (gsi); gsi_next (&gsi))
2609                 {
2610                   gimple pattern_def_stmt = gsi_stmt (gsi);
2611                   stmt_vec_info pattern_def_stmt_info
2612 		    = vinfo_for_stmt (pattern_def_stmt);
2613                   if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
2614                       || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
2615 		    {
2616                       vec_inside_cost
2617 			+= STMT_VINFO_INSIDE_OF_LOOP_COST
2618 			   (pattern_def_stmt_info) * factor;
2619                       vec_outside_cost
2620 			+= STMT_VINFO_OUTSIDE_OF_LOOP_COST
2621 			   (pattern_def_stmt_info);
2622                     }
2623 		}
2624 	    }
2625 	}
2626     }
2627 
2628   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2629 
2630   /* Add additional cost for the peeled instructions in prologue and epilogue
2631      loop.
2632 
2633      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2634      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2635 
2636      TODO: Build an expression that represents peel_iters for prologue and
2637      epilogue to be used in a run-time test.  */
2638 
2639   if (npeel  < 0)
2640     {
2641       peel_iters_prologue = vf/2;
2642       if (vect_print_dump_info (REPORT_COST))
2643         fprintf (vect_dump, "cost model: "
2644                  "prologue peel iters set to vf/2.");
2645 
2646       /* If peeling for alignment is unknown, loop bound of main loop becomes
2647          unknown.  */
2648       peel_iters_epilogue = vf/2;
2649       if (vect_print_dump_info (REPORT_COST))
2650         fprintf (vect_dump, "cost model: "
2651                  "epilogue peel iters set to vf/2 because "
2652                  "peeling for alignment is unknown .");
2653 
2654       /* If peeled iterations are unknown, count a taken branch and a not taken
2655          branch per peeled loop. Even if scalar loop iterations are known,
2656          vector iterations are not known since peeled prologue iterations are
2657          not known. Hence guards remain the same.  */
2658       peel_guard_costs +=  2 * (vect_get_cost (cond_branch_taken)
2659                                 + vect_get_cost (cond_branch_not_taken));
2660       vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2661                            + (peel_iters_epilogue * scalar_single_iter_cost)
2662                            + peel_guard_costs;
2663     }
2664   else
2665     {
2666       peel_iters_prologue = npeel;
2667       vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
2668                                     peel_iters_prologue, &peel_iters_epilogue,
2669                                     scalar_single_iter_cost);
2670     }
2671 
2672   /* FORNOW: The scalar outside cost is incremented in one of the
2673      following ways:
2674 
2675      1. The vectorizer checks for alignment and aliasing and generates
2676      a condition that allows dynamic vectorization.  A cost model
2677      check is ANDED with the versioning condition.  Hence scalar code
2678      path now has the added cost of the versioning check.
2679 
2680        if (cost > th & versioning_check)
2681          jmp to vector code
2682 
2683      Hence run-time scalar is incremented by not-taken branch cost.
2684 
2685      2. The vectorizer then checks if a prologue is required.  If the
2686      cost model check was not done before during versioning, it has to
2687      be done before the prologue check.
2688 
2689        if (cost <= th)
2690          prologue = scalar_iters
2691        if (prologue == 0)
2692          jmp to vector code
2693        else
2694          execute prologue
2695        if (prologue == num_iters)
2696 	 go to exit
2697 
2698      Hence the run-time scalar cost is incremented by a taken branch,
2699      plus a not-taken branch, plus a taken branch cost.
2700 
2701      3. The vectorizer then checks if an epilogue is required.  If the
2702      cost model check was not done before during prologue check, it
2703      has to be done with the epilogue check.
2704 
2705        if (prologue == 0)
2706          jmp to vector code
2707        else
2708          execute prologue
2709        if (prologue == num_iters)
2710 	 go to exit
2711        vector code:
2712          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2713            jmp to epilogue
2714 
2715      Hence the run-time scalar cost should be incremented by 2 taken
2716      branches.
2717 
2718      TODO: The back end may reorder the BBS's differently and reverse
2719      conditions/branch directions.  Change the estimates below to
2720      something more reasonable.  */
2721 
2722   /* If the number of iterations is known and we do not do versioning, we can
2723      decide whether to vectorize at compile time.  Hence the scalar version
2724      do not carry cost model guard costs.  */
2725   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2726       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2727       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2728     {
2729       /* Cost model check occurs at versioning.  */
2730       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2731           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2732 	scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
2733       else
2734 	{
2735 	  /* Cost model check occurs at prologue generation.  */
2736 	  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2737 	    scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
2738                                    + vect_get_cost (cond_branch_not_taken);
2739 	  /* Cost model check occurs at epilogue generation.  */
2740 	  else
2741 	    scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
2742 	}
2743     }
2744 
2745   /* Add SLP costs.  */
2746   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2747   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2748     {
2749       vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2750       vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2751     }
2752 
2753   /* Calculate number of iterations required to make the vector version
2754      profitable, relative to the loop bodies only.  The following condition
2755      must hold true:
2756      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2757      where
2758      SIC = scalar iteration cost, VIC = vector iteration cost,
2759      VOC = vector outside cost, VF = vectorization factor,
2760      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2761      SOC = scalar outside cost for run time cost model check.  */
2762 
2763   if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2764     {
2765       if (vec_outside_cost <= 0)
2766         min_profitable_iters = 1;
2767       else
2768         {
2769           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2770 				  - vec_inside_cost * peel_iters_prologue
2771                                   - vec_inside_cost * peel_iters_epilogue)
2772                                  / ((scalar_single_iter_cost * vf)
2773                                     - vec_inside_cost);
2774 
2775           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2776               <= ((vec_inside_cost * min_profitable_iters)
2777                   + ((vec_outside_cost - scalar_outside_cost) * vf)))
2778             min_profitable_iters++;
2779         }
2780     }
2781   /* vector version will never be profitable.  */
2782   else
2783     {
2784       if (vect_print_dump_info (REPORT_COST))
2785         fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2786 		 "divided by the scalar iteration cost = %d "
2787 		 "is greater or equal to the vectorization factor = %d.",
2788                  vec_inside_cost, scalar_single_iter_cost, vf);
2789       return -1;
2790     }
2791 
2792   if (vect_print_dump_info (REPORT_COST))
2793     {
2794       fprintf (vect_dump, "Cost model analysis: \n");
2795       fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
2796 	       vec_inside_cost);
2797       fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
2798 	       vec_outside_cost);
2799       fprintf (vect_dump, "  Scalar iteration cost: %d\n",
2800 	       scalar_single_iter_cost);
2801       fprintf (vect_dump, "  Scalar outside cost: %d\n", scalar_outside_cost);
2802       fprintf (vect_dump, "  prologue iterations: %d\n",
2803                peel_iters_prologue);
2804       fprintf (vect_dump, "  epilogue iterations: %d\n",
2805                peel_iters_epilogue);
2806       fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
2807 	       min_profitable_iters);
2808     }
2809 
2810   min_profitable_iters =
2811 	min_profitable_iters < vf ? vf : min_profitable_iters;
2812 
2813   /* Because the condition we create is:
2814      if (niters <= min_profitable_iters)
2815        then skip the vectorized loop.  */
2816   min_profitable_iters--;
2817 
2818   if (vect_print_dump_info (REPORT_COST))
2819     fprintf (vect_dump, "  Profitability threshold = %d\n",
2820 	     min_profitable_iters);
2821 
2822   return min_profitable_iters;
2823 }
2824 
2825 
2826 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2827    functions. Design better to avoid maintenance issues.  */
2828 
2829 /* Function vect_model_reduction_cost.
2830 
2831    Models cost for a reduction operation, including the vector ops
2832    generated within the strip-mine loop, the initial definition before
2833    the loop, and the epilogue code that must be generated.  */
2834 
2835 static bool
2836 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2837 			   int ncopies)
2838 {
2839   int outer_cost = 0;
2840   enum tree_code code;
2841   optab optab;
2842   tree vectype;
2843   gimple stmt, orig_stmt;
2844   tree reduction_op;
2845   enum machine_mode mode;
2846   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2847   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2848 
2849 
2850   /* Cost of reduction op inside loop.  */
2851   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2852     += ncopies * vect_get_cost (vector_stmt);
2853 
2854   stmt = STMT_VINFO_STMT (stmt_info);
2855 
2856   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2857     {
2858     case GIMPLE_SINGLE_RHS:
2859       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2860       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2861       break;
2862     case GIMPLE_UNARY_RHS:
2863       reduction_op = gimple_assign_rhs1 (stmt);
2864       break;
2865     case GIMPLE_BINARY_RHS:
2866       reduction_op = gimple_assign_rhs2 (stmt);
2867       break;
2868     case GIMPLE_TERNARY_RHS:
2869       reduction_op = gimple_assign_rhs3 (stmt);
2870       break;
2871     default:
2872       gcc_unreachable ();
2873     }
2874 
2875   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2876   if (!vectype)
2877     {
2878       if (vect_print_dump_info (REPORT_COST))
2879         {
2880           fprintf (vect_dump, "unsupported data-type ");
2881           print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2882         }
2883       return false;
2884    }
2885 
2886   mode = TYPE_MODE (vectype);
2887   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2888 
2889   if (!orig_stmt)
2890     orig_stmt = STMT_VINFO_STMT (stmt_info);
2891 
2892   code = gimple_assign_rhs_code (orig_stmt);
2893 
2894   /* Add in cost for initial definition.  */
2895   outer_cost += vect_get_cost (scalar_to_vec);
2896 
2897   /* Determine cost of epilogue code.
2898 
2899      We have a reduction operator that will reduce the vector in one statement.
2900      Also requires scalar extract.  */
2901 
2902   if (!nested_in_vect_loop_p (loop, orig_stmt))
2903     {
2904       if (reduc_code != ERROR_MARK)
2905 	outer_cost += vect_get_cost (vector_stmt)
2906                       + vect_get_cost (vec_to_scalar);
2907       else
2908 	{
2909 	  int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2910 	  tree bitsize =
2911 	    TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2912 	  int element_bitsize = tree_low_cst (bitsize, 1);
2913 	  int nelements = vec_size_in_bits / element_bitsize;
2914 
2915 	  optab = optab_for_tree_code (code, vectype, optab_default);
2916 
2917 	  /* We have a whole vector shift available.  */
2918 	  if (VECTOR_MODE_P (mode)
2919 	      && optab_handler (optab, mode) != CODE_FOR_nothing
2920 	      && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
2921 	    /* Final reduction via vector shifts and the reduction operator. Also
2922 	       requires scalar extract.  */
2923 	    outer_cost += ((exact_log2(nelements) * 2)
2924               * vect_get_cost (vector_stmt)
2925   	      + vect_get_cost (vec_to_scalar));
2926 	  else
2927 	    /* Use extracts and reduction op for final reduction.  For N elements,
2928                we have N extracts and N-1 reduction ops.  */
2929 	    outer_cost += ((nelements + nelements - 1)
2930               * vect_get_cost (vector_stmt));
2931 	}
2932     }
2933 
2934   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2935 
2936   if (vect_print_dump_info (REPORT_COST))
2937     fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2938              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2939              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2940 
2941   return true;
2942 }
2943 
2944 
2945 /* Function vect_model_induction_cost.
2946 
2947    Models cost for induction operations.  */
2948 
2949 static void
2950 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2951 {
2952   /* loop cost for vec_loop.  */
2953   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2954     = ncopies * vect_get_cost (vector_stmt);
2955   /* prologue cost for vec_init and vec_step.  */
2956   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
2957     = 2 * vect_get_cost (scalar_to_vec);
2958 
2959   if (vect_print_dump_info (REPORT_COST))
2960     fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2961              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2962              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2963 }
2964 
2965 
2966 /* Function get_initial_def_for_induction
2967 
2968    Input:
2969    STMT - a stmt that performs an induction operation in the loop.
2970    IV_PHI - the initial value of the induction variable
2971 
2972    Output:
2973    Return a vector variable, initialized with the first VF values of
2974    the induction variable.  E.g., for an iv with IV_PHI='X' and
2975    evolution S, for a vector of 4 units, we want to return:
2976    [X, X + S, X + 2*S, X + 3*S].  */
2977 
2978 static tree
2979 get_initial_def_for_induction (gimple iv_phi)
2980 {
2981   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2982   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2983   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2984   tree scalar_type;
2985   tree vectype;
2986   int nunits;
2987   edge pe = loop_preheader_edge (loop);
2988   struct loop *iv_loop;
2989   basic_block new_bb;
2990   tree vec, vec_init, vec_step, t;
2991   tree access_fn;
2992   tree new_var;
2993   tree new_name;
2994   gimple init_stmt, induction_phi, new_stmt;
2995   tree induc_def, vec_def, vec_dest;
2996   tree init_expr, step_expr;
2997   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2998   int i;
2999   bool ok;
3000   int ncopies;
3001   tree expr;
3002   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3003   bool nested_in_vect_loop = false;
3004   gimple_seq stmts = NULL;
3005   imm_use_iterator imm_iter;
3006   use_operand_p use_p;
3007   gimple exit_phi;
3008   edge latch_e;
3009   tree loop_arg;
3010   gimple_stmt_iterator si;
3011   basic_block bb = gimple_bb (iv_phi);
3012   tree stepvectype;
3013   tree resvectype;
3014 
3015   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3016   if (nested_in_vect_loop_p (loop, iv_phi))
3017     {
3018       nested_in_vect_loop = true;
3019       iv_loop = loop->inner;
3020     }
3021   else
3022     iv_loop = loop;
3023   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3024 
3025   latch_e = loop_latch_edge (iv_loop);
3026   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3027 
3028   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3029   gcc_assert (access_fn);
3030   STRIP_NOPS (access_fn);
3031   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3032                                     &init_expr, &step_expr);
3033   gcc_assert (ok);
3034   pe = loop_preheader_edge (iv_loop);
3035 
3036   scalar_type = TREE_TYPE (init_expr);
3037   vectype = get_vectype_for_scalar_type (scalar_type);
3038   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3039   gcc_assert (vectype);
3040   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3041   ncopies = vf / nunits;
3042 
3043   gcc_assert (phi_info);
3044   gcc_assert (ncopies >= 1);
3045 
3046   /* Find the first insertion point in the BB.  */
3047   si = gsi_after_labels (bb);
3048 
3049   /* Create the vector that holds the initial_value of the induction.  */
3050   if (nested_in_vect_loop)
3051     {
3052       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3053 	 been created during vectorization of previous stmts.  We obtain it
3054 	 from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3055       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3056                                            loop_preheader_edge (iv_loop));
3057       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3058     }
3059   else
3060     {
3061       /* iv_loop is the loop to be vectorized. Create:
3062 	 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3063       new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
3064       add_referenced_var (new_var);
3065 
3066       new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
3067       if (stmts)
3068 	{
3069 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3070 	  gcc_assert (!new_bb);
3071 	}
3072 
3073       t = NULL_TREE;
3074       t = tree_cons (NULL_TREE, new_name, t);
3075       for (i = 1; i < nunits; i++)
3076 	{
3077 	  /* Create: new_name_i = new_name + step_expr  */
3078 	  enum tree_code code = POINTER_TYPE_P (scalar_type)
3079 				? POINTER_PLUS_EXPR : PLUS_EXPR;
3080 	  init_stmt = gimple_build_assign_with_ops (code, new_var,
3081 						    new_name, step_expr);
3082 	  new_name = make_ssa_name (new_var, init_stmt);
3083 	  gimple_assign_set_lhs (init_stmt, new_name);
3084 
3085 	  new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3086 	  gcc_assert (!new_bb);
3087 
3088 	  if (vect_print_dump_info (REPORT_DETAILS))
3089 	    {
3090 	      fprintf (vect_dump, "created new init_stmt: ");
3091 	      print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
3092 	    }
3093 	  t = tree_cons (NULL_TREE, new_name, t);
3094 	}
3095       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3096       vec = build_constructor_from_list (vectype, nreverse (t));
3097       vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
3098     }
3099 
3100 
3101   /* Create the vector that holds the step of the induction.  */
3102   if (nested_in_vect_loop)
3103     /* iv_loop is nested in the loop to be vectorized. Generate:
3104        vec_step = [S, S, S, S]  */
3105     new_name = step_expr;
3106   else
3107     {
3108       /* iv_loop is the loop to be vectorized. Generate:
3109 	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3110       expr = build_int_cst (TREE_TYPE (step_expr), vf);
3111       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3112 			      expr, step_expr);
3113     }
3114 
3115   t = unshare_expr (new_name);
3116   gcc_assert (CONSTANT_CLASS_P (new_name));
3117   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3118   gcc_assert (stepvectype);
3119   vec = build_vector_from_val (stepvectype, t);
3120   vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3121 
3122 
3123   /* Create the following def-use cycle:
3124      loop prolog:
3125          vec_init = ...
3126 	 vec_step = ...
3127      loop:
3128          vec_iv = PHI <vec_init, vec_loop>
3129          ...
3130          STMT
3131          ...
3132          vec_loop = vec_iv + vec_step;  */
3133 
3134   /* Create the induction-phi that defines the induction-operand.  */
3135   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3136   add_referenced_var (vec_dest);
3137   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3138   set_vinfo_for_stmt (induction_phi,
3139 		      new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3140   induc_def = PHI_RESULT (induction_phi);
3141 
3142   /* Create the iv update inside the loop  */
3143   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3144 					   induc_def, vec_step);
3145   vec_def = make_ssa_name (vec_dest, new_stmt);
3146   gimple_assign_set_lhs (new_stmt, vec_def);
3147   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3148   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3149                                                    NULL));
3150 
3151   /* Set the arguments of the phi node:  */
3152   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3153   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3154 	       UNKNOWN_LOCATION);
3155 
3156 
3157   /* In case that vectorization factor (VF) is bigger than the number
3158      of elements that we can fit in a vectype (nunits), we have to generate
3159      more than one vector stmt - i.e - we need to "unroll" the
3160      vector stmt by a factor VF/nunits.  For more details see documentation
3161      in vectorizable_operation.  */
3162 
3163   if (ncopies > 1)
3164     {
3165       stmt_vec_info prev_stmt_vinfo;
3166       /* FORNOW. This restriction should be relaxed.  */
3167       gcc_assert (!nested_in_vect_loop);
3168 
3169       /* Create the vector that holds the step of the induction.  */
3170       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3171       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3172 			      expr, step_expr);
3173       t = unshare_expr (new_name);
3174       gcc_assert (CONSTANT_CLASS_P (new_name));
3175       vec = build_vector_from_val (stepvectype, t);
3176       vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3177 
3178       vec_def = induc_def;
3179       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3180       for (i = 1; i < ncopies; i++)
3181 	{
3182 	  /* vec_i = vec_prev + vec_step  */
3183 	  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3184 						   vec_def, vec_step);
3185 	  vec_def = make_ssa_name (vec_dest, new_stmt);
3186 	  gimple_assign_set_lhs (new_stmt, vec_def);
3187 
3188 	  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3189 	  if (!useless_type_conversion_p (resvectype, vectype))
3190 	    {
3191 	      new_stmt = gimple_build_assign_with_ops
3192 		  (VIEW_CONVERT_EXPR,
3193 		   vect_get_new_vect_var (resvectype, vect_simple_var,
3194 					  "vec_iv_"),
3195 		   build1 (VIEW_CONVERT_EXPR, resvectype,
3196 			   gimple_assign_lhs (new_stmt)), NULL_TREE);
3197 	      gimple_assign_set_lhs (new_stmt,
3198 				     make_ssa_name
3199 				       (gimple_assign_lhs (new_stmt), new_stmt));
3200 	      gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3201 	    }
3202 	  set_vinfo_for_stmt (new_stmt,
3203 			      new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3204 	  STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3205 	  prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3206 	}
3207     }
3208 
3209   if (nested_in_vect_loop)
3210     {
3211       /* Find the loop-closed exit-phi of the induction, and record
3212          the final vector of induction results:  */
3213       exit_phi = NULL;
3214       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3215         {
3216 	  if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3217 	    {
3218 	      exit_phi = USE_STMT (use_p);
3219 	      break;
3220 	    }
3221         }
3222       if (exit_phi)
3223 	{
3224 	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3225 	  /* FORNOW. Currently not supporting the case that an inner-loop induction
3226 	     is not used in the outer-loop (i.e. only outside the outer-loop).  */
3227 	  gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3228 		      && !STMT_VINFO_LIVE_P (stmt_vinfo));
3229 
3230 	  STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3231 	  if (vect_print_dump_info (REPORT_DETAILS))
3232 	    {
3233 	      fprintf (vect_dump, "vector of inductions after inner-loop:");
3234 	      print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
3235 	    }
3236 	}
3237     }
3238 
3239 
3240   if (vect_print_dump_info (REPORT_DETAILS))
3241     {
3242       fprintf (vect_dump, "transform induction: created def-use cycle: ");
3243       print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
3244       fprintf (vect_dump, "\n");
3245       print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
3246     }
3247 
3248   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3249   if (!useless_type_conversion_p (resvectype, vectype))
3250     {
3251       new_stmt = gimple_build_assign_with_ops
3252 	 (VIEW_CONVERT_EXPR,
3253 	  vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3254 	  build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3255       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3256       gimple_assign_set_lhs (new_stmt, induc_def);
3257       si = gsi_start_bb (bb);
3258       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3259       set_vinfo_for_stmt (new_stmt,
3260 			  new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3261       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3262 	= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3263     }
3264 
3265   return induc_def;
3266 }
3267 
3268 
3269 /* Function get_initial_def_for_reduction
3270 
3271    Input:
3272    STMT - a stmt that performs a reduction operation in the loop.
3273    INIT_VAL - the initial value of the reduction variable
3274 
3275    Output:
3276    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3277         of the reduction (used for adjusting the epilog - see below).
3278    Return a vector variable, initialized according to the operation that STMT
3279         performs. This vector will be used as the initial value of the
3280         vector of partial results.
3281 
3282    Option1 (adjust in epilog): Initialize the vector as follows:
3283      add/bit or/xor:    [0,0,...,0,0]
3284      mult/bit and:      [1,1,...,1,1]
3285      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3286    and when necessary (e.g. add/mult case) let the caller know
3287    that it needs to adjust the result by init_val.
3288 
3289    Option2: Initialize the vector as follows:
3290      add/bit or/xor:    [init_val,0,0,...,0]
3291      mult/bit and:      [init_val,1,1,...,1]
3292      min/max/cond_expr: [init_val,init_val,...,init_val]
3293    and no adjustments are needed.
3294 
3295    For example, for the following code:
3296 
3297    s = init_val;
3298    for (i=0;i<n;i++)
3299      s = s + a[i];
3300 
3301    STMT is 's = s + a[i]', and the reduction variable is 's'.
3302    For a vector of 4 units, we want to return either [0,0,0,init_val],
3303    or [0,0,0,0] and let the caller know that it needs to adjust
3304    the result at the end by 'init_val'.
3305 
3306    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3307    initialization vector is simpler (same element in all entries), if
3308    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3309 
3310    A cost model should help decide between these two schemes.  */
3311 
3312 tree
3313 get_initial_def_for_reduction (gimple stmt, tree init_val,
3314                                tree *adjustment_def)
3315 {
3316   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3317   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3318   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3319   tree scalar_type = TREE_TYPE (init_val);
3320   tree vectype = get_vectype_for_scalar_type (scalar_type);
3321   int nunits;
3322   enum tree_code code = gimple_assign_rhs_code (stmt);
3323   tree def_for_init;
3324   tree init_def;
3325   tree t = NULL_TREE;
3326   int i;
3327   bool nested_in_vect_loop = false;
3328   tree init_value;
3329   REAL_VALUE_TYPE real_init_val = dconst0;
3330   int int_init_val = 0;
3331   gimple def_stmt = NULL;
3332 
3333   gcc_assert (vectype);
3334   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3335 
3336   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3337 	      || SCALAR_FLOAT_TYPE_P (scalar_type));
3338 
3339   if (nested_in_vect_loop_p (loop, stmt))
3340     nested_in_vect_loop = true;
3341   else
3342     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3343 
3344   /* In case of double reduction we only create a vector variable to be put
3345      in the reduction phi node.  The actual statement creation is done in
3346      vect_create_epilog_for_reduction.  */
3347   if (adjustment_def && nested_in_vect_loop
3348       && TREE_CODE (init_val) == SSA_NAME
3349       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3350       && gimple_code (def_stmt) == GIMPLE_PHI
3351       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3352       && vinfo_for_stmt (def_stmt)
3353       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3354           == vect_double_reduction_def)
3355     {
3356       *adjustment_def = NULL;
3357       return vect_create_destination_var (init_val, vectype);
3358     }
3359 
3360   if (TREE_CONSTANT (init_val))
3361     {
3362       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3363         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3364       else
3365         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3366     }
3367   else
3368     init_value = init_val;
3369 
3370   switch (code)
3371     {
3372       case WIDEN_SUM_EXPR:
3373       case DOT_PROD_EXPR:
3374       case PLUS_EXPR:
3375       case MINUS_EXPR:
3376       case BIT_IOR_EXPR:
3377       case BIT_XOR_EXPR:
3378       case MULT_EXPR:
3379       case BIT_AND_EXPR:
3380         /* ADJUSMENT_DEF is NULL when called from
3381            vect_create_epilog_for_reduction to vectorize double reduction.  */
3382         if (adjustment_def)
3383           {
3384             if (nested_in_vect_loop)
3385               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3386                                                               NULL);
3387             else
3388               *adjustment_def = init_val;
3389           }
3390 
3391         if (code == MULT_EXPR)
3392           {
3393             real_init_val = dconst1;
3394             int_init_val = 1;
3395           }
3396 
3397         if (code == BIT_AND_EXPR)
3398           int_init_val = -1;
3399 
3400         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3401           def_for_init = build_real (scalar_type, real_init_val);
3402         else
3403           def_for_init = build_int_cst (scalar_type, int_init_val);
3404 
3405         /* Create a vector of '0' or '1' except the first element.  */
3406         for (i = nunits - 2; i >= 0; --i)
3407           t = tree_cons (NULL_TREE, def_for_init, t);
3408 
3409         /* Option1: the first element is '0' or '1' as well.  */
3410         if (adjustment_def)
3411           {
3412             t = tree_cons (NULL_TREE, def_for_init, t);
3413             init_def = build_vector (vectype, t);
3414             break;
3415           }
3416 
3417         /* Option2: the first element is INIT_VAL.  */
3418         t = tree_cons (NULL_TREE, init_value, t);
3419         if (TREE_CONSTANT (init_val))
3420           init_def = build_vector (vectype, t);
3421         else
3422           init_def = build_constructor_from_list (vectype, t);
3423 
3424         break;
3425 
3426       case MIN_EXPR:
3427       case MAX_EXPR:
3428       case COND_EXPR:
3429         if (adjustment_def)
3430           {
3431             *adjustment_def = NULL_TREE;
3432             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3433             break;
3434           }
3435 
3436 	init_def = build_vector_from_val (vectype, init_value);
3437         break;
3438 
3439       default:
3440         gcc_unreachable ();
3441     }
3442 
3443   return init_def;
3444 }
3445 
3446 
3447 /* Function vect_create_epilog_for_reduction
3448 
3449    Create code at the loop-epilog to finalize the result of a reduction
3450    computation.
3451 
3452    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3453      reduction statements.
3454    STMT is the scalar reduction stmt that is being vectorized.
3455    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3456      number of elements that we can fit in a vectype (nunits).  In this case
3457      we have to generate more than one vector stmt - i.e - we need to "unroll"
3458      the vector stmt by a factor VF/nunits.  For more details see documentation
3459      in vectorizable_operation.
3460    REDUC_CODE is the tree-code for the epilog reduction.
3461    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3462      computation.
3463    REDUC_INDEX is the index of the operand in the right hand side of the
3464      statement that is defined by REDUCTION_PHI.
3465    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3466    SLP_NODE is an SLP node containing a group of reduction statements. The
3467      first one in this group is STMT.
3468 
3469    This function:
3470    1. Creates the reduction def-use cycles: sets the arguments for
3471       REDUCTION_PHIS:
3472       The loop-entry argument is the vectorized initial-value of the reduction.
3473       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3474       sums.
3475    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3476       by applying the operation specified by REDUC_CODE if available, or by
3477       other means (whole-vector shifts or a scalar loop).
3478       The function also creates a new phi node at the loop exit to preserve
3479       loop-closed form, as illustrated below.
3480 
3481      The flow at the entry to this function:
3482 
3483         loop:
3484           vec_def = phi <null, null>            # REDUCTION_PHI
3485           VECT_DEF = vector_stmt                # vectorized form of STMT
3486           s_loop = scalar_stmt                  # (scalar) STMT
3487         loop_exit:
3488           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3489           use <s_out0>
3490           use <s_out0>
3491 
3492      The above is transformed by this function into:
3493 
3494         loop:
3495           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3496           VECT_DEF = vector_stmt                # vectorized form of STMT
3497           s_loop = scalar_stmt                  # (scalar) STMT
3498         loop_exit:
3499           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3500           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3501           v_out2 = reduce <v_out1>
3502           s_out3 = extract_field <v_out2, 0>
3503           s_out4 = adjust_result <s_out3>
3504           use <s_out4>
3505           use <s_out4>
3506 */
3507 
3508 static void
3509 vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3510 				  int ncopies, enum tree_code reduc_code,
3511 				  VEC (gimple, heap) *reduction_phis,
3512                                   int reduc_index, bool double_reduc,
3513                                   slp_tree slp_node)
3514 {
3515   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3516   stmt_vec_info prev_phi_info;
3517   tree vectype;
3518   enum machine_mode mode;
3519   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3520   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3521   basic_block exit_bb;
3522   tree scalar_dest;
3523   tree scalar_type;
3524   gimple new_phi = NULL, phi;
3525   gimple_stmt_iterator exit_gsi;
3526   tree vec_dest;
3527   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3528   gimple epilog_stmt = NULL;
3529   enum tree_code code = gimple_assign_rhs_code (stmt);
3530   gimple exit_phi;
3531   tree bitsize, bitpos;
3532   tree adjustment_def = NULL;
3533   tree vec_initial_def = NULL;
3534   tree reduction_op, expr, def;
3535   tree orig_name, scalar_result;
3536   imm_use_iterator imm_iter, phi_imm_iter;
3537   use_operand_p use_p, phi_use_p;
3538   bool extract_scalar_result = false;
3539   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3540   bool nested_in_vect_loop = false;
3541   VEC (gimple, heap) *new_phis = NULL;
3542   VEC (gimple, heap) *inner_phis = NULL;
3543   enum vect_def_type dt = vect_unknown_def_type;
3544   int j, i;
3545   VEC (tree, heap) *scalar_results = NULL;
3546   unsigned int group_size = 1, k, ratio;
3547   VEC (tree, heap) *vec_initial_defs = NULL;
3548   VEC (gimple, heap) *phis;
3549   bool slp_reduc = false;
3550   tree new_phi_result;
3551   gimple inner_phi = NULL;
3552 
3553   if (slp_node)
3554     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3555 
3556   if (nested_in_vect_loop_p (loop, stmt))
3557     {
3558       outer_loop = loop;
3559       loop = loop->inner;
3560       nested_in_vect_loop = true;
3561       gcc_assert (!slp_node);
3562     }
3563 
3564   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3565     {
3566     case GIMPLE_SINGLE_RHS:
3567       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3568 		  == ternary_op);
3569       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3570       break;
3571     case GIMPLE_UNARY_RHS:
3572       reduction_op = gimple_assign_rhs1 (stmt);
3573       break;
3574     case GIMPLE_BINARY_RHS:
3575       reduction_op = reduc_index ?
3576                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3577       break;
3578     case GIMPLE_TERNARY_RHS:
3579       reduction_op = gimple_op (stmt, reduc_index + 1);
3580       break;
3581     default:
3582       gcc_unreachable ();
3583     }
3584 
3585   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3586   gcc_assert (vectype);
3587   mode = TYPE_MODE (vectype);
3588 
3589   /* 1. Create the reduction def-use cycle:
3590      Set the arguments of REDUCTION_PHIS, i.e., transform
3591 
3592         loop:
3593           vec_def = phi <null, null>            # REDUCTION_PHI
3594           VECT_DEF = vector_stmt                # vectorized form of STMT
3595           ...
3596 
3597      into:
3598 
3599         loop:
3600           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3601           VECT_DEF = vector_stmt                # vectorized form of STMT
3602           ...
3603 
3604      (in case of SLP, do it for all the phis). */
3605 
3606   /* Get the loop-entry arguments.  */
3607   if (slp_node)
3608     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3609                        NULL, slp_node, reduc_index);
3610   else
3611     {
3612       vec_initial_defs = VEC_alloc (tree, heap, 1);
3613      /* For the case of reduction, vect_get_vec_def_for_operand returns
3614         the scalar def before the loop, that defines the initial value
3615         of the reduction variable.  */
3616       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3617                                                       &adjustment_def);
3618       VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3619     }
3620 
3621   /* Set phi nodes arguments.  */
3622   FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3623     {
3624       tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3625       tree def = VEC_index (tree, vect_defs, i);
3626       for (j = 0; j < ncopies; j++)
3627         {
3628           /* Set the loop-entry arg of the reduction-phi.  */
3629           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3630                        UNKNOWN_LOCATION);
3631 
3632           /* Set the loop-latch arg for the reduction-phi.  */
3633           if (j > 0)
3634             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3635 
3636           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3637 
3638           if (vect_print_dump_info (REPORT_DETAILS))
3639             {
3640               fprintf (vect_dump, "transform reduction: created def-use"
3641                                   " cycle: ");
3642               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3643               fprintf (vect_dump, "\n");
3644               print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0,
3645                                  TDF_SLIM);
3646             }
3647 
3648           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3649         }
3650     }
3651 
3652   VEC_free (tree, heap, vec_initial_defs);
3653 
3654   /* 2. Create epilog code.
3655         The reduction epilog code operates across the elements of the vector
3656         of partial results computed by the vectorized loop.
3657         The reduction epilog code consists of:
3658 
3659         step 1: compute the scalar result in a vector (v_out2)
3660         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3661         step 3: adjust the scalar result (s_out3) if needed.
3662 
3663         Step 1 can be accomplished using one the following three schemes:
3664           (scheme 1) using reduc_code, if available.
3665           (scheme 2) using whole-vector shifts, if available.
3666           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3667                      combined.
3668 
3669           The overall epilog code looks like this:
3670 
3671           s_out0 = phi <s_loop>         # original EXIT_PHI
3672           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3673           v_out2 = reduce <v_out1>              # step 1
3674           s_out3 = extract_field <v_out2, 0>    # step 2
3675           s_out4 = adjust_result <s_out3>       # step 3
3676 
3677           (step 3 is optional, and steps 1 and 2 may be combined).
3678           Lastly, the uses of s_out0 are replaced by s_out4.  */
3679 
3680 
3681   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3682          v_out1 = phi <VECT_DEF>
3683          Store them in NEW_PHIS.  */
3684 
3685   exit_bb = single_exit (loop)->dest;
3686   prev_phi_info = NULL;
3687   new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3688   FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3689     {
3690       for (j = 0; j < ncopies; j++)
3691         {
3692           phi = create_phi_node (SSA_NAME_VAR (def), exit_bb);
3693           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3694           if (j == 0)
3695             VEC_quick_push (gimple, new_phis, phi);
3696           else
3697 	    {
3698 	      def = vect_get_vec_def_for_stmt_copy (dt, def);
3699 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3700 	    }
3701 
3702           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3703           prev_phi_info = vinfo_for_stmt (phi);
3704         }
3705     }
3706 
3707   /* The epilogue is created for the outer-loop, i.e., for the loop being
3708      vectorized.  Create exit phis for the outer loop.  */
3709   if (double_reduc)
3710     {
3711       loop = outer_loop;
3712       exit_bb = single_exit (loop)->dest;
3713       inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3714       FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
3715 	{
3716 	  gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3717 					      exit_bb);
3718 	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3719 			   PHI_RESULT (phi));
3720 	  set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3721 							    loop_vinfo, NULL));
3722 	  VEC_quick_push (gimple, inner_phis, phi);
3723 	  VEC_replace (gimple, new_phis, i, outer_phi);
3724 	  prev_phi_info = vinfo_for_stmt (outer_phi);
3725           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3726             {
3727 	      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3728 	      outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3729 					   exit_bb);
3730 	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3731 			       PHI_RESULT (phi));
3732 	      set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3733 							loop_vinfo, NULL));
3734 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3735 	      prev_phi_info = vinfo_for_stmt (outer_phi);
3736 	    }
3737 	}
3738     }
3739 
3740   exit_gsi = gsi_after_labels (exit_bb);
3741 
3742   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3743          (i.e. when reduc_code is not available) and in the final adjustment
3744 	 code (if needed).  Also get the original scalar reduction variable as
3745          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3746          represents a reduction pattern), the tree-code and scalar-def are
3747          taken from the original stmt that the pattern-stmt (STMT) replaces.
3748          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3749          are taken from STMT.  */
3750 
3751   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3752   if (!orig_stmt)
3753     {
3754       /* Regular reduction  */
3755       orig_stmt = stmt;
3756     }
3757   else
3758     {
3759       /* Reduction pattern  */
3760       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3761       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3762       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3763     }
3764 
3765   code = gimple_assign_rhs_code (orig_stmt);
3766   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3767      partial results are added and not subtracted.  */
3768   if (code == MINUS_EXPR)
3769     code = PLUS_EXPR;
3770 
3771   scalar_dest = gimple_assign_lhs (orig_stmt);
3772   scalar_type = TREE_TYPE (scalar_dest);
3773   scalar_results = VEC_alloc (tree, heap, group_size);
3774   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3775   bitsize = TYPE_SIZE (scalar_type);
3776 
3777   /* In case this is a reduction in an inner-loop while vectorizing an outer
3778      loop - we don't need to extract a single scalar result at the end of the
3779      inner-loop (unless it is double reduction, i.e., the use of reduction is
3780      outside the outer-loop).  The final vector of partial results will be used
3781      in the vectorized outer-loop, or reduced to a scalar result at the end of
3782      the outer-loop.  */
3783   if (nested_in_vect_loop && !double_reduc)
3784     goto vect_finalize_reduction;
3785 
3786   /* SLP reduction without reduction chain, e.g.,
3787      # a1 = phi <a2, a0>
3788      # b1 = phi <b2, b0>
3789      a2 = operation (a1)
3790      b2 = operation (b1)  */
3791   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3792 
3793   /* In case of reduction chain, e.g.,
3794      # a1 = phi <a3, a0>
3795      a2 = operation (a1)
3796      a3 = operation (a2),
3797 
3798      we may end up with more than one vector result.  Here we reduce them to
3799      one vector.  */
3800   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3801     {
3802       tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3803       tree tmp;
3804       gimple new_vec_stmt = NULL;
3805 
3806       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3807       for (k = 1; k < VEC_length (gimple, new_phis); k++)
3808         {
3809           gimple next_phi = VEC_index (gimple, new_phis, k);
3810           tree second_vect = PHI_RESULT (next_phi);
3811 
3812           tmp = build2 (code, vectype,  first_vect, second_vect);
3813           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3814           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3815           gimple_assign_set_lhs (new_vec_stmt, first_vect);
3816           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3817         }
3818 
3819       new_phi_result = first_vect;
3820       if (new_vec_stmt)
3821         {
3822           VEC_truncate (gimple, new_phis, 0);
3823           VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
3824         }
3825     }
3826   else
3827     new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3828 
3829   /* 2.3 Create the reduction code, using one of the three schemes described
3830          above. In SLP we simply need to extract all the elements from the
3831          vector (without reducing them), so we use scalar shifts.  */
3832   if (reduc_code != ERROR_MARK && !slp_reduc)
3833     {
3834       tree tmp;
3835 
3836       /*** Case 1:  Create:
3837            v_out2 = reduc_expr <v_out1>  */
3838 
3839       if (vect_print_dump_info (REPORT_DETAILS))
3840         fprintf (vect_dump, "Reduce using direct vector reduction.");
3841 
3842       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3843       tmp = build1 (reduc_code, vectype, new_phi_result);
3844       epilog_stmt = gimple_build_assign (vec_dest, tmp);
3845       new_temp = make_ssa_name (vec_dest, epilog_stmt);
3846       gimple_assign_set_lhs (epilog_stmt, new_temp);
3847       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3848 
3849       extract_scalar_result = true;
3850     }
3851   else
3852     {
3853       enum tree_code shift_code = ERROR_MARK;
3854       bool have_whole_vector_shift = true;
3855       int bit_offset;
3856       int element_bitsize = tree_low_cst (bitsize, 1);
3857       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3858       tree vec_temp;
3859 
3860       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3861         shift_code = VEC_RSHIFT_EXPR;
3862       else
3863         have_whole_vector_shift = false;
3864 
3865       /* Regardless of whether we have a whole vector shift, if we're
3866          emulating the operation via tree-vect-generic, we don't want
3867          to use it.  Only the first round of the reduction is likely
3868          to still be profitable via emulation.  */
3869       /* ??? It might be better to emit a reduction tree code here, so that
3870          tree-vect-generic can expand the first round via bit tricks.  */
3871       if (!VECTOR_MODE_P (mode))
3872         have_whole_vector_shift = false;
3873       else
3874         {
3875           optab optab = optab_for_tree_code (code, vectype, optab_default);
3876           if (optab_handler (optab, mode) == CODE_FOR_nothing)
3877             have_whole_vector_shift = false;
3878         }
3879 
3880       if (have_whole_vector_shift && !slp_reduc)
3881         {
3882           /*** Case 2: Create:
3883              for (offset = VS/2; offset >= element_size; offset/=2)
3884                 {
3885                   Create:  va' = vec_shift <va, offset>
3886                   Create:  va = vop <va, va'>
3887                 }  */
3888 
3889           if (vect_print_dump_info (REPORT_DETAILS))
3890             fprintf (vect_dump, "Reduce using vector shifts");
3891 
3892           vec_dest = vect_create_destination_var (scalar_dest, vectype);
3893           new_temp = new_phi_result;
3894           for (bit_offset = vec_size_in_bits/2;
3895                bit_offset >= element_bitsize;
3896                bit_offset /= 2)
3897             {
3898               tree bitpos = size_int (bit_offset);
3899 
3900               epilog_stmt = gimple_build_assign_with_ops (shift_code,
3901                                                vec_dest, new_temp, bitpos);
3902               new_name = make_ssa_name (vec_dest, epilog_stmt);
3903               gimple_assign_set_lhs (epilog_stmt, new_name);
3904               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3905 
3906               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3907                                                           new_name, new_temp);
3908               new_temp = make_ssa_name (vec_dest, epilog_stmt);
3909               gimple_assign_set_lhs (epilog_stmt, new_temp);
3910               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3911             }
3912 
3913           extract_scalar_result = true;
3914         }
3915       else
3916         {
3917           tree rhs;
3918 
3919           /*** Case 3: Create:
3920              s = extract_field <v_out2, 0>
3921              for (offset = element_size;
3922                   offset < vector_size;
3923                   offset += element_size;)
3924                {
3925                  Create:  s' = extract_field <v_out2, offset>
3926                  Create:  s = op <s, s'>  // For non SLP cases
3927                }  */
3928 
3929           if (vect_print_dump_info (REPORT_DETAILS))
3930             fprintf (vect_dump, "Reduce using scalar code. ");
3931 
3932           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3933           FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
3934             {
3935               if (gimple_code (new_phi) == GIMPLE_PHI)
3936                 vec_temp = PHI_RESULT (new_phi);
3937               else
3938                 vec_temp = gimple_assign_lhs (new_phi);
3939               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3940                             bitsize_zero_node);
3941               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3942               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3943               gimple_assign_set_lhs (epilog_stmt, new_temp);
3944               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3945 
3946               /* In SLP we don't need to apply reduction operation, so we just
3947                  collect s' values in SCALAR_RESULTS.  */
3948               if (slp_reduc)
3949                 VEC_safe_push (tree, heap, scalar_results, new_temp);
3950 
3951               for (bit_offset = element_bitsize;
3952                    bit_offset < vec_size_in_bits;
3953                    bit_offset += element_bitsize)
3954                 {
3955                   tree bitpos = bitsize_int (bit_offset);
3956                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
3957                                      bitsize, bitpos);
3958 
3959                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3960                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3961                   gimple_assign_set_lhs (epilog_stmt, new_name);
3962                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3963 
3964                   if (slp_reduc)
3965                     {
3966                       /* In SLP we don't need to apply reduction operation, so
3967                          we just collect s' values in SCALAR_RESULTS.  */
3968                       new_temp = new_name;
3969                       VEC_safe_push (tree, heap, scalar_results, new_name);
3970                     }
3971                   else
3972                     {
3973                       epilog_stmt = gimple_build_assign_with_ops (code,
3974                                           new_scalar_dest, new_name, new_temp);
3975                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3976                       gimple_assign_set_lhs (epilog_stmt, new_temp);
3977                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3978                     }
3979                 }
3980             }
3981 
3982           /* The only case where we need to reduce scalar results in SLP, is
3983              unrolling.  If the size of SCALAR_RESULTS is greater than
3984              GROUP_SIZE, we reduce them combining elements modulo
3985              GROUP_SIZE.  */
3986           if (slp_reduc)
3987             {
3988               tree res, first_res, new_res;
3989               gimple new_stmt;
3990 
3991               /* Reduce multiple scalar results in case of SLP unrolling.  */
3992               for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
3993                    j++)
3994                 {
3995                   first_res = VEC_index (tree, scalar_results, j % group_size);
3996                   new_stmt = gimple_build_assign_with_ops (code,
3997                                               new_scalar_dest, first_res, res);
3998                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
3999                   gimple_assign_set_lhs (new_stmt, new_res);
4000                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4001                   VEC_replace (tree, scalar_results, j % group_size, new_res);
4002                 }
4003             }
4004           else
4005             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4006             VEC_safe_push (tree, heap, scalar_results, new_temp);
4007 
4008           extract_scalar_result = false;
4009         }
4010     }
4011 
4012   /* 2.4  Extract the final scalar result.  Create:
4013           s_out3 = extract_field <v_out2, bitpos>  */
4014 
4015   if (extract_scalar_result)
4016     {
4017       tree rhs;
4018 
4019       if (vect_print_dump_info (REPORT_DETAILS))
4020         fprintf (vect_dump, "extract scalar result");
4021 
4022       if (BYTES_BIG_ENDIAN)
4023         bitpos = size_binop (MULT_EXPR,
4024                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4025                              TYPE_SIZE (scalar_type));
4026       else
4027         bitpos = bitsize_zero_node;
4028 
4029       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4030       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4031       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4032       gimple_assign_set_lhs (epilog_stmt, new_temp);
4033       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4034       VEC_safe_push (tree, heap, scalar_results, new_temp);
4035     }
4036 
4037 vect_finalize_reduction:
4038 
4039   if (double_reduc)
4040     loop = loop->inner;
4041 
4042   /* 2.5 Adjust the final result by the initial value of the reduction
4043 	 variable. (When such adjustment is not needed, then
4044 	 'adjustment_def' is zero).  For example, if code is PLUS we create:
4045 	 new_temp = loop_exit_def + adjustment_def  */
4046 
4047   if (adjustment_def)
4048     {
4049       gcc_assert (!slp_reduc);
4050       if (nested_in_vect_loop)
4051 	{
4052           new_phi = VEC_index (gimple, new_phis, 0);
4053 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4054 	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4055 	  new_dest = vect_create_destination_var (scalar_dest, vectype);
4056 	}
4057       else
4058 	{
4059           new_temp = VEC_index (tree, scalar_results, 0);
4060 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4061 	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
4062 	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4063 	}
4064 
4065       epilog_stmt = gimple_build_assign (new_dest, expr);
4066       new_temp = make_ssa_name (new_dest, epilog_stmt);
4067       gimple_assign_set_lhs (epilog_stmt, new_temp);
4068       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4069       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4070       if (nested_in_vect_loop)
4071         {
4072           set_vinfo_for_stmt (epilog_stmt,
4073                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4074                                                  NULL));
4075           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4076                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4077 
4078           if (!double_reduc)
4079             VEC_quick_push (tree, scalar_results, new_temp);
4080           else
4081             VEC_replace (tree, scalar_results, 0, new_temp);
4082         }
4083       else
4084         VEC_replace (tree, scalar_results, 0, new_temp);
4085 
4086       VEC_replace (gimple, new_phis, 0, epilog_stmt);
4087     }
4088 
4089   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4090           phis with new adjusted scalar results, i.e., replace use <s_out0>
4091           with use <s_out4>.
4092 
4093      Transform:
4094         loop_exit:
4095           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4096           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4097           v_out2 = reduce <v_out1>
4098           s_out3 = extract_field <v_out2, 0>
4099           s_out4 = adjust_result <s_out3>
4100           use <s_out0>
4101           use <s_out0>
4102 
4103      into:
4104 
4105         loop_exit:
4106           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4107           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4108           v_out2 = reduce <v_out1>
4109           s_out3 = extract_field <v_out2, 0>
4110           s_out4 = adjust_result <s_out3>
4111           use <s_out4>
4112           use <s_out4> */
4113 
4114 
4115   /* In SLP reduction chain we reduce vector results into one vector if
4116      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4117      the last stmt in the reduction chain, since we are looking for the loop
4118      exit phi node.  */
4119   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4120     {
4121       scalar_dest = gimple_assign_lhs (VEC_index (gimple,
4122                                        SLP_TREE_SCALAR_STMTS (slp_node),
4123                                        group_size - 1));
4124       group_size = 1;
4125     }
4126 
4127   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4128      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4129      need to match SCALAR_RESULTS with corresponding statements.  The first
4130      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4131      the first vector stmt, etc.
4132      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4133   if (group_size > VEC_length (gimple, new_phis))
4134     {
4135       ratio = group_size / VEC_length (gimple, new_phis);
4136       gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
4137     }
4138   else
4139     ratio = 1;
4140 
4141   for (k = 0; k < group_size; k++)
4142     {
4143       if (k % ratio == 0)
4144         {
4145           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
4146           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
4147 	  if (double_reduc)
4148 	    inner_phi = VEC_index (gimple, inner_phis, k / ratio);
4149         }
4150 
4151       if (slp_reduc)
4152         {
4153           gimple current_stmt = VEC_index (gimple,
4154                                        SLP_TREE_SCALAR_STMTS (slp_node), k);
4155 
4156           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4157           /* SLP statements can't participate in patterns.  */
4158           gcc_assert (!orig_stmt);
4159           scalar_dest = gimple_assign_lhs (current_stmt);
4160         }
4161 
4162       phis = VEC_alloc (gimple, heap, 3);
4163       /* Find the loop-closed-use at the loop exit of the original scalar
4164          result.  (The reduction result is expected to have two immediate uses -
4165          one at the latch block, and one at the loop exit).  */
4166       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4167         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4168 	    && !is_gimple_debug (USE_STMT (use_p)))
4169           VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4170 
4171       /* We expect to have found an exit_phi because of loop-closed-ssa
4172          form.  */
4173       gcc_assert (!VEC_empty (gimple, phis));
4174 
4175       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4176         {
4177           if (outer_loop)
4178             {
4179               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4180               gimple vect_phi;
4181 
4182               /* FORNOW. Currently not supporting the case that an inner-loop
4183                  reduction is not used in the outer-loop (but only outside the
4184                  outer-loop), unless it is double reduction.  */
4185               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4186                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4187                           || double_reduc);
4188 
4189               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4190               if (!double_reduc
4191                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4192                       != vect_double_reduction_def)
4193                 continue;
4194 
4195               /* Handle double reduction:
4196 
4197                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4198                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4199                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4200                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4201 
4202                  At that point the regular reduction (stmt2 and stmt3) is
4203                  already vectorized, as well as the exit phi node, stmt4.
4204                  Here we vectorize the phi node of double reduction, stmt1, and
4205                  update all relevant statements.  */
4206 
4207               /* Go through all the uses of s2 to find double reduction phi
4208                  node, i.e., stmt1 above.  */
4209               orig_name = PHI_RESULT (exit_phi);
4210               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4211                 {
4212                   stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4213                   stmt_vec_info new_phi_vinfo;
4214                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4215                   basic_block bb = gimple_bb (use_stmt);
4216                   gimple use;
4217 
4218                   /* Check that USE_STMT is really double reduction phi
4219                      node.  */
4220                   if (gimple_code (use_stmt) != GIMPLE_PHI
4221                       || gimple_phi_num_args (use_stmt) != 2
4222                       || !use_stmt_vinfo
4223                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4224                           != vect_double_reduction_def
4225                       || bb->loop_father != outer_loop)
4226                     continue;
4227 
4228                   /* Create vector phi node for double reduction:
4229                      vs1 = phi <vs0, vs2>
4230                      vs1 was created previously in this function by a call to
4231                        vect_get_vec_def_for_operand and is stored in
4232                        vec_initial_def;
4233                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4234                      vs0 is created here.  */
4235 
4236                   /* Create vector phi node.  */
4237                   vect_phi = create_phi_node (vec_initial_def, bb);
4238                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4239                                     loop_vec_info_for_loop (outer_loop), NULL);
4240                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4241 
4242                   /* Create vs0 - initial def of the double reduction phi.  */
4243                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4244                                              loop_preheader_edge (outer_loop));
4245                   init_def = get_initial_def_for_reduction (stmt,
4246                                                           preheader_arg, NULL);
4247                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4248                                                     vectype, NULL);
4249 
4250                   /* Update phi node arguments with vs0 and vs2.  */
4251                   add_phi_arg (vect_phi, vect_phi_init,
4252                                loop_preheader_edge (outer_loop),
4253                                UNKNOWN_LOCATION);
4254                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4255                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4256                   if (vect_print_dump_info (REPORT_DETAILS))
4257                     {
4258                       fprintf (vect_dump, "created double reduction phi "
4259                                           "node: ");
4260                       print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
4261                     }
4262 
4263                   vect_phi_res = PHI_RESULT (vect_phi);
4264 
4265                   /* Replace the use, i.e., set the correct vs1 in the regular
4266                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4267                      loop is redundant.  */
4268                   use = reduction_phi;
4269                   for (j = 0; j < ncopies; j++)
4270                     {
4271                       edge pr_edge = loop_preheader_edge (loop);
4272                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4273                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4274                     }
4275                 }
4276             }
4277         }
4278 
4279       VEC_free (gimple, heap, phis);
4280       if (nested_in_vect_loop)
4281         {
4282           if (double_reduc)
4283             loop = outer_loop;
4284           else
4285             continue;
4286         }
4287 
4288       phis = VEC_alloc (gimple, heap, 3);
4289       /* Find the loop-closed-use at the loop exit of the original scalar
4290          result.  (The reduction result is expected to have two immediate uses,
4291          one at the latch block, and one at the loop exit).  For double
4292          reductions we are looking for exit phis of the outer loop.  */
4293       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4294         {
4295           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4296 	    {
4297 	      if (!is_gimple_debug (USE_STMT (use_p)))
4298 		VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4299 	    }
4300           else
4301             {
4302               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4303                 {
4304                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4305 
4306                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4307                     {
4308                       if (!flow_bb_inside_loop_p (loop,
4309                                              gimple_bb (USE_STMT (phi_use_p)))
4310 			  && !is_gimple_debug (USE_STMT (phi_use_p)))
4311                         VEC_safe_push (gimple, heap, phis,
4312                                        USE_STMT (phi_use_p));
4313                     }
4314                 }
4315             }
4316         }
4317 
4318       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4319         {
4320           /* Replace the uses:  */
4321           orig_name = PHI_RESULT (exit_phi);
4322           scalar_result = VEC_index (tree, scalar_results, k);
4323           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4324             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4325               SET_USE (use_p, scalar_result);
4326         }
4327 
4328       VEC_free (gimple, heap, phis);
4329     }
4330 
4331   VEC_free (tree, heap, scalar_results);
4332   VEC_free (gimple, heap, new_phis);
4333 }
4334 
4335 
4336 /* Function vectorizable_reduction.
4337 
4338    Check if STMT performs a reduction operation that can be vectorized.
4339    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4340    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4341    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4342 
4343    This function also handles reduction idioms (patterns) that have been
4344    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4345    of this form:
4346      X = pattern_expr (arg0, arg1, ..., X)
4347    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4348    sequence that had been detected and replaced by the pattern-stmt (STMT).
4349 
4350    In some cases of reduction patterns, the type of the reduction variable X is
4351    different than the type of the other arguments of STMT.
4352    In such cases, the vectype that is used when transforming STMT into a vector
4353    stmt is different than the vectype that is used to determine the
4354    vectorization factor, because it consists of a different number of elements
4355    than the actual number of elements that are being operated upon in parallel.
4356 
4357    For example, consider an accumulation of shorts into an int accumulator.
4358    On some targets it's possible to vectorize this pattern operating on 8
4359    shorts at a time (hence, the vectype for purposes of determining the
4360    vectorization factor should be V8HI); on the other hand, the vectype that
4361    is used to create the vector form is actually V4SI (the type of the result).
4362 
4363    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4364    indicates what is the actual level of parallelism (V8HI in the example), so
4365    that the right vectorization factor would be derived.  This vectype
4366    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4367    be used to create the vectorized stmt.  The right vectype for the vectorized
4368    stmt is obtained from the type of the result X:
4369         get_vectype_for_scalar_type (TREE_TYPE (X))
4370 
4371    This means that, contrary to "regular" reductions (or "regular" stmts in
4372    general), the following equation:
4373       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4374    does *NOT* necessarily hold for reduction patterns.  */
4375 
4376 bool
4377 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4378 			gimple *vec_stmt, slp_tree slp_node)
4379 {
4380   tree vec_dest;
4381   tree scalar_dest;
4382   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4383   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4384   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4385   tree vectype_in = NULL_TREE;
4386   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4387   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4388   enum tree_code code, orig_code, epilog_reduc_code;
4389   enum machine_mode vec_mode;
4390   int op_type;
4391   optab optab, reduc_optab;
4392   tree new_temp = NULL_TREE;
4393   tree def;
4394   gimple def_stmt;
4395   enum vect_def_type dt;
4396   gimple new_phi = NULL;
4397   tree scalar_type;
4398   bool is_simple_use;
4399   gimple orig_stmt;
4400   stmt_vec_info orig_stmt_info;
4401   tree expr = NULL_TREE;
4402   int i;
4403   int ncopies;
4404   int epilog_copies;
4405   stmt_vec_info prev_stmt_info, prev_phi_info;
4406   bool single_defuse_cycle = false;
4407   tree reduc_def = NULL_TREE;
4408   gimple new_stmt = NULL;
4409   int j;
4410   tree ops[3];
4411   bool nested_cycle = false, found_nested_cycle_def = false;
4412   gimple reduc_def_stmt = NULL;
4413   /* The default is that the reduction variable is the last in statement.  */
4414   int reduc_index = 2;
4415   bool double_reduc = false, dummy;
4416   basic_block def_bb;
4417   struct loop * def_stmt_loop, *outer_loop = NULL;
4418   tree def_arg;
4419   gimple def_arg_stmt;
4420   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
4421   VEC (gimple, heap) *phis = NULL;
4422   int vec_num;
4423   tree def0, def1, tem, op0, op1 = NULL_TREE;
4424 
4425   /* In case of reduction chain we switch to the first stmt in the chain, but
4426      we don't update STMT_INFO, since only the last stmt is marked as reduction
4427      and has reduction properties.  */
4428   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4429     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4430 
4431   if (nested_in_vect_loop_p (loop, stmt))
4432     {
4433       outer_loop = loop;
4434       loop = loop->inner;
4435       nested_cycle = true;
4436     }
4437 
4438   /* 1. Is vectorizable reduction?  */
4439   /* Not supportable if the reduction variable is used in the loop, unless
4440      it's a reduction chain.  */
4441   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4442       && !GROUP_FIRST_ELEMENT (stmt_info))
4443     return false;
4444 
4445   /* Reductions that are not used even in an enclosing outer-loop,
4446      are expected to be "live" (used out of the loop).  */
4447   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4448       && !STMT_VINFO_LIVE_P (stmt_info))
4449     return false;
4450 
4451   /* Make sure it was already recognized as a reduction computation.  */
4452   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4453       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4454     return false;
4455 
4456   /* 2. Has this been recognized as a reduction pattern?
4457 
4458      Check if STMT represents a pattern that has been recognized
4459      in earlier analysis stages.  For stmts that represent a pattern,
4460      the STMT_VINFO_RELATED_STMT field records the last stmt in
4461      the original sequence that constitutes the pattern.  */
4462 
4463   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4464   if (orig_stmt)
4465     {
4466       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4467       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4468       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4469     }
4470 
4471   /* 3. Check the operands of the operation.  The first operands are defined
4472         inside the loop body. The last operand is the reduction variable,
4473         which is defined by the loop-header-phi.  */
4474 
4475   gcc_assert (is_gimple_assign (stmt));
4476 
4477   /* Flatten RHS.  */
4478   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4479     {
4480     case GIMPLE_SINGLE_RHS:
4481       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4482       if (op_type == ternary_op)
4483 	{
4484 	  tree rhs = gimple_assign_rhs1 (stmt);
4485 	  ops[0] = TREE_OPERAND (rhs, 0);
4486 	  ops[1] = TREE_OPERAND (rhs, 1);
4487 	  ops[2] = TREE_OPERAND (rhs, 2);
4488 	  code = TREE_CODE (rhs);
4489 	}
4490       else
4491 	return false;
4492       break;
4493 
4494     case GIMPLE_BINARY_RHS:
4495       code = gimple_assign_rhs_code (stmt);
4496       op_type = TREE_CODE_LENGTH (code);
4497       gcc_assert (op_type == binary_op);
4498       ops[0] = gimple_assign_rhs1 (stmt);
4499       ops[1] = gimple_assign_rhs2 (stmt);
4500       break;
4501 
4502     case GIMPLE_TERNARY_RHS:
4503       code = gimple_assign_rhs_code (stmt);
4504       op_type = TREE_CODE_LENGTH (code);
4505       gcc_assert (op_type == ternary_op);
4506       ops[0] = gimple_assign_rhs1 (stmt);
4507       ops[1] = gimple_assign_rhs2 (stmt);
4508       ops[2] = gimple_assign_rhs3 (stmt);
4509       break;
4510 
4511     case GIMPLE_UNARY_RHS:
4512       return false;
4513 
4514     default:
4515       gcc_unreachable ();
4516     }
4517 
4518   if (code == COND_EXPR && slp_node)
4519     return false;
4520 
4521   scalar_dest = gimple_assign_lhs (stmt);
4522   scalar_type = TREE_TYPE (scalar_dest);
4523   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4524       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4525     return false;
4526 
4527   /* Do not try to vectorize bit-precision reductions.  */
4528   if ((TYPE_PRECISION (scalar_type)
4529        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4530     return false;
4531 
4532   /* All uses but the last are expected to be defined in the loop.
4533      The last use is the reduction variable.  In case of nested cycle this
4534      assumption is not true: we use reduc_index to record the index of the
4535      reduction variable.  */
4536   for (i = 0; i < op_type - 1; i++)
4537     {
4538       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4539       if (i == 0 && code == COND_EXPR)
4540         continue;
4541 
4542       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4543 					    &def_stmt, &def, &dt, &tem);
4544       if (!vectype_in)
4545 	vectype_in = tem;
4546       gcc_assert (is_simple_use);
4547 
4548       if (dt != vect_internal_def
4549 	  && dt != vect_external_def
4550 	  && dt != vect_constant_def
4551 	  && dt != vect_induction_def
4552           && !(dt == vect_nested_cycle && nested_cycle))
4553 	return false;
4554 
4555       if (dt == vect_nested_cycle)
4556         {
4557           found_nested_cycle_def = true;
4558           reduc_def_stmt = def_stmt;
4559           reduc_index = i;
4560         }
4561     }
4562 
4563   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4564 					&def_stmt, &def, &dt, &tem);
4565   if (!vectype_in)
4566     vectype_in = tem;
4567   gcc_assert (is_simple_use);
4568   if (!(dt == vect_reduction_def
4569 	|| dt == vect_nested_cycle
4570 	|| ((dt == vect_internal_def || dt == vect_external_def
4571 	     || dt == vect_constant_def || dt == vect_induction_def)
4572 	    && nested_cycle && found_nested_cycle_def)))
4573     {
4574       /* For pattern recognized stmts, orig_stmt might be a reduction,
4575 	 but some helper statements for the pattern might not, or
4576 	 might be COND_EXPRs with reduction uses in the condition.  */
4577       gcc_assert (orig_stmt);
4578       return false;
4579     }
4580   if (!found_nested_cycle_def)
4581     reduc_def_stmt = def_stmt;
4582 
4583   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4584   if (orig_stmt)
4585     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4586                                                        reduc_def_stmt,
4587                                                        !nested_cycle,
4588                                                        &dummy));
4589   else
4590     {
4591       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4592                                              !nested_cycle, &dummy);
4593       /* We changed STMT to be the first stmt in reduction chain, hence we
4594          check that in this case the first element in the chain is STMT.  */
4595       gcc_assert (stmt == tmp
4596                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4597     }
4598 
4599   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4600     return false;
4601 
4602   if (slp_node || PURE_SLP_STMT (stmt_info))
4603     ncopies = 1;
4604   else
4605     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4606                / TYPE_VECTOR_SUBPARTS (vectype_in));
4607 
4608   gcc_assert (ncopies >= 1);
4609 
4610   vec_mode = TYPE_MODE (vectype_in);
4611 
4612   if (code == COND_EXPR)
4613     {
4614       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4615         {
4616           if (vect_print_dump_info (REPORT_DETAILS))
4617             fprintf (vect_dump, "unsupported condition in reduction");
4618 
4619             return false;
4620         }
4621     }
4622   else
4623     {
4624       /* 4. Supportable by target?  */
4625 
4626       /* 4.1. check support for the operation in the loop  */
4627       optab = optab_for_tree_code (code, vectype_in, optab_default);
4628       if (!optab)
4629         {
4630           if (vect_print_dump_info (REPORT_DETAILS))
4631             fprintf (vect_dump, "no optab.");
4632 
4633           return false;
4634         }
4635 
4636       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4637         {
4638           if (vect_print_dump_info (REPORT_DETAILS))
4639             fprintf (vect_dump, "op not supported by target.");
4640 
4641           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4642               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4643 	          < vect_min_worthwhile_factor (code))
4644             return false;
4645 
4646           if (vect_print_dump_info (REPORT_DETAILS))
4647   	    fprintf (vect_dump, "proceeding using word mode.");
4648         }
4649 
4650       /* Worthwhile without SIMD support?  */
4651       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4652           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4653    	     < vect_min_worthwhile_factor (code))
4654         {
4655           if (vect_print_dump_info (REPORT_DETAILS))
4656 	    fprintf (vect_dump, "not worthwhile without SIMD support.");
4657 
4658           return false;
4659         }
4660     }
4661 
4662   /* 4.2. Check support for the epilog operation.
4663 
4664           If STMT represents a reduction pattern, then the type of the
4665           reduction variable may be different than the type of the rest
4666           of the arguments.  For example, consider the case of accumulation
4667           of shorts into an int accumulator; The original code:
4668                         S1: int_a = (int) short_a;
4669           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4670 
4671           was replaced with:
4672                         STMT: int_acc = widen_sum <short_a, int_acc>
4673 
4674           This means that:
4675           1. The tree-code that is used to create the vector operation in the
4676              epilog code (that reduces the partial results) is not the
4677              tree-code of STMT, but is rather the tree-code of the original
4678              stmt from the pattern that STMT is replacing.  I.e, in the example
4679              above we want to use 'widen_sum' in the loop, but 'plus' in the
4680              epilog.
4681           2. The type (mode) we use to check available target support
4682              for the vector operation to be created in the *epilog*, is
4683              determined by the type of the reduction variable (in the example
4684              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4685              However the type (mode) we use to check available target support
4686              for the vector operation to be created *inside the loop*, is
4687              determined by the type of the other arguments to STMT (in the
4688              example we'd check this: optab_handler (widen_sum_optab,
4689 	     vect_short_mode)).
4690 
4691           This is contrary to "regular" reductions, in which the types of all
4692           the arguments are the same as the type of the reduction variable.
4693           For "regular" reductions we can therefore use the same vector type
4694           (and also the same tree-code) when generating the epilog code and
4695           when generating the code inside the loop.  */
4696 
4697   if (orig_stmt)
4698     {
4699       /* This is a reduction pattern: get the vectype from the type of the
4700          reduction variable, and get the tree-code from orig_stmt.  */
4701       orig_code = gimple_assign_rhs_code (orig_stmt);
4702       gcc_assert (vectype_out);
4703       vec_mode = TYPE_MODE (vectype_out);
4704     }
4705   else
4706     {
4707       /* Regular reduction: use the same vectype and tree-code as used for
4708          the vector code inside the loop can be used for the epilog code. */
4709       orig_code = code;
4710     }
4711 
4712   if (nested_cycle)
4713     {
4714       def_bb = gimple_bb (reduc_def_stmt);
4715       def_stmt_loop = def_bb->loop_father;
4716       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4717                                        loop_preheader_edge (def_stmt_loop));
4718       if (TREE_CODE (def_arg) == SSA_NAME
4719           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4720           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4721           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4722           && vinfo_for_stmt (def_arg_stmt)
4723           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4724               == vect_double_reduction_def)
4725         double_reduc = true;
4726     }
4727 
4728   epilog_reduc_code = ERROR_MARK;
4729   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4730     {
4731       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4732                                          optab_default);
4733       if (!reduc_optab)
4734         {
4735           if (vect_print_dump_info (REPORT_DETAILS))
4736             fprintf (vect_dump, "no optab for reduction.");
4737 
4738           epilog_reduc_code = ERROR_MARK;
4739         }
4740 
4741       if (reduc_optab
4742           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4743         {
4744           if (vect_print_dump_info (REPORT_DETAILS))
4745             fprintf (vect_dump, "reduc op not supported by target.");
4746 
4747           epilog_reduc_code = ERROR_MARK;
4748         }
4749     }
4750   else
4751     {
4752       if (!nested_cycle || double_reduc)
4753         {
4754           if (vect_print_dump_info (REPORT_DETAILS))
4755             fprintf (vect_dump, "no reduc code for scalar code.");
4756 
4757           return false;
4758         }
4759     }
4760 
4761   if (double_reduc && ncopies > 1)
4762     {
4763       if (vect_print_dump_info (REPORT_DETAILS))
4764         fprintf (vect_dump, "multiple types in double reduction");
4765 
4766       return false;
4767     }
4768 
4769   /* In case of widenning multiplication by a constant, we update the type
4770      of the constant to be the type of the other operand.  We check that the
4771      constant fits the type in the pattern recognition pass.  */
4772   if (code == DOT_PROD_EXPR
4773       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4774     {
4775       if (TREE_CODE (ops[0]) == INTEGER_CST)
4776         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4777       else if (TREE_CODE (ops[1]) == INTEGER_CST)
4778         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4779       else
4780         {
4781           if (vect_print_dump_info (REPORT_DETAILS))
4782             fprintf (vect_dump, "invalid types in dot-prod");
4783 
4784           return false;
4785         }
4786     }
4787 
4788   if (!vec_stmt) /* transformation not required.  */
4789     {
4790       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4791         return false;
4792       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4793       return true;
4794     }
4795 
4796   /** Transform.  **/
4797 
4798   if (vect_print_dump_info (REPORT_DETAILS))
4799     fprintf (vect_dump, "transform reduction.");
4800 
4801   /* FORNOW: Multiple types are not supported for condition.  */
4802   if (code == COND_EXPR)
4803     gcc_assert (ncopies == 1);
4804 
4805   /* Create the destination vector  */
4806   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4807 
4808   /* In case the vectorization factor (VF) is bigger than the number
4809      of elements that we can fit in a vectype (nunits), we have to generate
4810      more than one vector stmt - i.e - we need to "unroll" the
4811      vector stmt by a factor VF/nunits.  For more details see documentation
4812      in vectorizable_operation.  */
4813 
4814   /* If the reduction is used in an outer loop we need to generate
4815      VF intermediate results, like so (e.g. for ncopies=2):
4816 	r0 = phi (init, r0)
4817 	r1 = phi (init, r1)
4818 	r0 = x0 + r0;
4819         r1 = x1 + r1;
4820     (i.e. we generate VF results in 2 registers).
4821     In this case we have a separate def-use cycle for each copy, and therefore
4822     for each copy we get the vector def for the reduction variable from the
4823     respective phi node created for this copy.
4824 
4825     Otherwise (the reduction is unused in the loop nest), we can combine
4826     together intermediate results, like so (e.g. for ncopies=2):
4827 	r = phi (init, r)
4828 	r = x0 + r;
4829 	r = x1 + r;
4830    (i.e. we generate VF/2 results in a single register).
4831    In this case for each copy we get the vector def for the reduction variable
4832    from the vectorized reduction operation generated in the previous iteration.
4833   */
4834 
4835   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
4836     {
4837       single_defuse_cycle = true;
4838       epilog_copies = 1;
4839     }
4840   else
4841     epilog_copies = ncopies;
4842 
4843   prev_stmt_info = NULL;
4844   prev_phi_info = NULL;
4845   if (slp_node)
4846     {
4847       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4848       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
4849                   == TYPE_VECTOR_SUBPARTS (vectype_in));
4850     }
4851   else
4852     {
4853       vec_num = 1;
4854       vec_oprnds0 = VEC_alloc (tree, heap, 1);
4855       if (op_type == ternary_op)
4856         vec_oprnds1 = VEC_alloc (tree, heap, 1);
4857     }
4858 
4859   phis = VEC_alloc (gimple, heap, vec_num);
4860   vect_defs = VEC_alloc (tree, heap, vec_num);
4861   if (!slp_node)
4862     VEC_quick_push (tree, vect_defs, NULL_TREE);
4863 
4864   for (j = 0; j < ncopies; j++)
4865     {
4866       if (j == 0 || !single_defuse_cycle)
4867 	{
4868           for (i = 0; i < vec_num; i++)
4869             {
4870               /* Create the reduction-phi that defines the reduction
4871                  operand.  */
4872               new_phi = create_phi_node (vec_dest, loop->header);
4873               set_vinfo_for_stmt (new_phi,
4874                                   new_stmt_vec_info (new_phi, loop_vinfo,
4875                                                      NULL));
4876                if (j == 0 || slp_node)
4877                  VEC_quick_push (gimple, phis, new_phi);
4878             }
4879         }
4880 
4881       if (code == COND_EXPR)
4882         {
4883           gcc_assert (!slp_node);
4884           vectorizable_condition (stmt, gsi, vec_stmt,
4885                                   PHI_RESULT (VEC_index (gimple, phis, 0)),
4886                                   reduc_index, NULL);
4887           /* Multiple types are not supported for condition.  */
4888           break;
4889         }
4890 
4891       /* Handle uses.  */
4892       if (j == 0)
4893         {
4894           op0 = ops[!reduc_index];
4895           if (op_type == ternary_op)
4896             {
4897               if (reduc_index == 0)
4898                 op1 = ops[2];
4899               else
4900                 op1 = ops[1];
4901             }
4902 
4903           if (slp_node)
4904             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4905                                slp_node, -1);
4906           else
4907             {
4908               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
4909                                                             stmt, NULL);
4910               VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
4911               if (op_type == ternary_op)
4912                {
4913                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
4914                                                                NULL);
4915                  VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
4916                }
4917             }
4918         }
4919       else
4920         {
4921           if (!slp_node)
4922             {
4923               enum vect_def_type dt;
4924               gimple dummy_stmt;
4925               tree dummy;
4926 
4927               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
4928                                   &dummy_stmt, &dummy, &dt);
4929               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
4930                                                               loop_vec_def0);
4931               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
4932               if (op_type == ternary_op)
4933                 {
4934                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
4935                                       &dummy, &dt);
4936                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
4937                                                                 loop_vec_def1);
4938                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
4939                 }
4940             }
4941 
4942           if (single_defuse_cycle)
4943             reduc_def = gimple_assign_lhs (new_stmt);
4944 
4945           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
4946         }
4947 
4948       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
4949         {
4950           if (slp_node)
4951             reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
4952           else
4953             {
4954               if (!single_defuse_cycle || j == 0)
4955                 reduc_def = PHI_RESULT (new_phi);
4956             }
4957 
4958           def1 = ((op_type == ternary_op)
4959                   ? VEC_index (tree, vec_oprnds1, i) : NULL);
4960           if (op_type == binary_op)
4961             {
4962               if (reduc_index == 0)
4963                 expr = build2 (code, vectype_out, reduc_def, def0);
4964               else
4965                 expr = build2 (code, vectype_out, def0, reduc_def);
4966             }
4967           else
4968             {
4969               if (reduc_index == 0)
4970                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
4971               else
4972                 {
4973                   if (reduc_index == 1)
4974                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
4975                   else
4976                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
4977                 }
4978             }
4979 
4980           new_stmt = gimple_build_assign (vec_dest, expr);
4981           new_temp = make_ssa_name (vec_dest, new_stmt);
4982           gimple_assign_set_lhs (new_stmt, new_temp);
4983           vect_finish_stmt_generation (stmt, new_stmt, gsi);
4984 
4985           if (slp_node)
4986             {
4987               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4988               VEC_quick_push (tree, vect_defs, new_temp);
4989             }
4990           else
4991             VEC_replace (tree, vect_defs, 0, new_temp);
4992         }
4993 
4994       if (slp_node)
4995         continue;
4996 
4997       if (j == 0)
4998 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4999       else
5000 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5001 
5002       prev_stmt_info = vinfo_for_stmt (new_stmt);
5003       prev_phi_info = vinfo_for_stmt (new_phi);
5004     }
5005 
5006   /* Finalize the reduction-phi (set its arguments) and create the
5007      epilog reduction code.  */
5008   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5009     {
5010       new_temp = gimple_assign_lhs (*vec_stmt);
5011       VEC_replace (tree, vect_defs, 0, new_temp);
5012     }
5013 
5014   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5015                                     epilog_reduc_code, phis, reduc_index,
5016                                     double_reduc, slp_node);
5017 
5018   VEC_free (gimple, heap, phis);
5019   VEC_free (tree, heap, vec_oprnds0);
5020   if (vec_oprnds1)
5021     VEC_free (tree, heap, vec_oprnds1);
5022 
5023   return true;
5024 }
5025 
5026 /* Function vect_min_worthwhile_factor.
5027 
5028    For a loop where we could vectorize the operation indicated by CODE,
5029    return the minimum vectorization factor that makes it worthwhile
5030    to use generic vectors.  */
5031 int
5032 vect_min_worthwhile_factor (enum tree_code code)
5033 {
5034   switch (code)
5035     {
5036     case PLUS_EXPR:
5037     case MINUS_EXPR:
5038     case NEGATE_EXPR:
5039       return 4;
5040 
5041     case BIT_AND_EXPR:
5042     case BIT_IOR_EXPR:
5043     case BIT_XOR_EXPR:
5044     case BIT_NOT_EXPR:
5045       return 2;
5046 
5047     default:
5048       return INT_MAX;
5049     }
5050 }
5051 
5052 
5053 /* Function vectorizable_induction
5054 
5055    Check if PHI performs an induction computation that can be vectorized.
5056    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5057    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5058    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5059 
5060 bool
5061 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5062 			gimple *vec_stmt)
5063 {
5064   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5065   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5066   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5067   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5068   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5069   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5070   tree vec_def;
5071 
5072   gcc_assert (ncopies >= 1);
5073   /* FORNOW. These restrictions should be relaxed.  */
5074   if (nested_in_vect_loop_p (loop, phi))
5075     {
5076       imm_use_iterator imm_iter;
5077       use_operand_p use_p;
5078       gimple exit_phi;
5079       edge latch_e;
5080       tree loop_arg;
5081 
5082       if (ncopies > 1)
5083 	{
5084 	  if (vect_print_dump_info (REPORT_DETAILS))
5085 	    fprintf (vect_dump, "multiple types in nested loop.");
5086 	  return false;
5087 	}
5088 
5089       exit_phi = NULL;
5090       latch_e = loop_latch_edge (loop->inner);
5091       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5092       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5093 	{
5094 	  if (!flow_bb_inside_loop_p (loop->inner,
5095 				      gimple_bb (USE_STMT (use_p))))
5096 	    {
5097 	      exit_phi = USE_STMT (use_p);
5098 	      break;
5099 	    }
5100 	}
5101       if (exit_phi)
5102 	{
5103 	  stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5104 	  if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5105 		&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5106 	    {
5107 	      if (vect_print_dump_info (REPORT_DETAILS))
5108 		fprintf (vect_dump, "inner-loop induction only used outside "
5109 			 "of the outer vectorized loop.");
5110 	      return false;
5111 	    }
5112 	}
5113     }
5114 
5115   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5116     return false;
5117 
5118   /* FORNOW: SLP not supported.  */
5119   if (STMT_SLP_TYPE (stmt_info))
5120     return false;
5121 
5122   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5123 
5124   if (gimple_code (phi) != GIMPLE_PHI)
5125     return false;
5126 
5127   if (!vec_stmt) /* transformation not required.  */
5128     {
5129       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5130       if (vect_print_dump_info (REPORT_DETAILS))
5131         fprintf (vect_dump, "=== vectorizable_induction ===");
5132       vect_model_induction_cost (stmt_info, ncopies);
5133       return true;
5134     }
5135 
5136   /** Transform.  **/
5137 
5138   if (vect_print_dump_info (REPORT_DETAILS))
5139     fprintf (vect_dump, "transform induction phi.");
5140 
5141   vec_def = get_initial_def_for_induction (phi);
5142   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5143   return true;
5144 }
5145 
5146 /* Function vectorizable_live_operation.
5147 
5148    STMT computes a value that is used outside the loop.  Check if
5149    it can be supported.  */
5150 
5151 bool
5152 vectorizable_live_operation (gimple stmt,
5153 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5154 			     gimple *vec_stmt ATTRIBUTE_UNUSED)
5155 {
5156   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5157   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5158   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5159   int i;
5160   int op_type;
5161   tree op;
5162   tree def;
5163   gimple def_stmt;
5164   enum vect_def_type dt;
5165   enum tree_code code;
5166   enum gimple_rhs_class rhs_class;
5167 
5168   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5169 
5170   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5171     return false;
5172 
5173   if (!is_gimple_assign (stmt))
5174     return false;
5175 
5176   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5177     return false;
5178 
5179   /* FORNOW. CHECKME. */
5180   if (nested_in_vect_loop_p (loop, stmt))
5181     return false;
5182 
5183   code = gimple_assign_rhs_code (stmt);
5184   op_type = TREE_CODE_LENGTH (code);
5185   rhs_class = get_gimple_rhs_class (code);
5186   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5187   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5188 
5189   /* FORNOW: support only if all uses are invariant.  This means
5190      that the scalar operations can remain in place, unvectorized.
5191      The original last scalar value that they compute will be used.  */
5192 
5193   for (i = 0; i < op_type; i++)
5194     {
5195       if (rhs_class == GIMPLE_SINGLE_RHS)
5196 	op = TREE_OPERAND (gimple_op (stmt, 1), i);
5197       else
5198 	op = gimple_op (stmt, i + 1);
5199       if (op
5200           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5201 				  &dt))
5202         {
5203           if (vect_print_dump_info (REPORT_DETAILS))
5204             fprintf (vect_dump, "use not simple.");
5205           return false;
5206         }
5207 
5208       if (dt != vect_external_def && dt != vect_constant_def)
5209         return false;
5210     }
5211 
5212   /* No transformation is required for the cases we currently support.  */
5213   return true;
5214 }
5215 
5216 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5217 
5218 static void
5219 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5220 {
5221   ssa_op_iter op_iter;
5222   imm_use_iterator imm_iter;
5223   def_operand_p def_p;
5224   gimple ustmt;
5225 
5226   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5227     {
5228       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5229 	{
5230 	  basic_block bb;
5231 
5232 	  if (!is_gimple_debug (ustmt))
5233 	    continue;
5234 
5235 	  bb = gimple_bb (ustmt);
5236 
5237 	  if (!flow_bb_inside_loop_p (loop, bb))
5238 	    {
5239 	      if (gimple_debug_bind_p (ustmt))
5240 		{
5241 		  if (vect_print_dump_info (REPORT_DETAILS))
5242 		    fprintf (vect_dump, "killing debug use");
5243 
5244 		  gimple_debug_bind_reset_value (ustmt);
5245 		  update_stmt (ustmt);
5246 		}
5247 	      else
5248 		gcc_unreachable ();
5249 	    }
5250 	}
5251     }
5252 }
5253 
5254 /* Function vect_transform_loop.
5255 
5256    The analysis phase has determined that the loop is vectorizable.
5257    Vectorize the loop - created vectorized stmts to replace the scalar
5258    stmts in the loop, and update the loop exit condition.  */
5259 
5260 void
5261 vect_transform_loop (loop_vec_info loop_vinfo)
5262 {
5263   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5264   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5265   int nbbs = loop->num_nodes;
5266   gimple_stmt_iterator si;
5267   int i;
5268   tree ratio = NULL;
5269   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5270   bool strided_store;
5271   bool slp_scheduled = false;
5272   unsigned int nunits;
5273   tree cond_expr = NULL_TREE;
5274   gimple_seq cond_expr_stmt_list = NULL;
5275   bool do_peeling_for_loop_bound;
5276   gimple stmt, pattern_stmt;
5277   gimple_seq pattern_def_seq = NULL;
5278   gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
5279   bool transform_pattern_stmt = false;
5280 
5281   if (vect_print_dump_info (REPORT_DETAILS))
5282     fprintf (vect_dump, "=== vec_transform_loop ===");
5283 
5284   /* Peel the loop if there are data refs with unknown alignment.
5285      Only one data ref with unknown store is allowed.  */
5286 
5287   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5288     vect_do_peeling_for_alignment (loop_vinfo);
5289 
5290   do_peeling_for_loop_bound
5291     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5292        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5293 	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5294        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
5295 
5296   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5297       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5298     vect_loop_versioning (loop_vinfo,
5299 			  !do_peeling_for_loop_bound,
5300 			  &cond_expr, &cond_expr_stmt_list);
5301 
5302   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5303      compile time constant), or it is a constant that doesn't divide by the
5304      vectorization factor, then an epilog loop needs to be created.
5305      We therefore duplicate the loop: the original loop will be vectorized,
5306      and will compute the first (n/VF) iterations.  The second copy of the loop
5307      will remain scalar and will compute the remaining (n%VF) iterations.
5308      (VF is the vectorization factor).  */
5309 
5310   if (do_peeling_for_loop_bound)
5311     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5312 				    cond_expr, cond_expr_stmt_list);
5313   else
5314     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5315 		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5316 
5317   /* 1) Make sure the loop header has exactly two entries
5318      2) Make sure we have a preheader basic block.  */
5319 
5320   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5321 
5322   split_edge (loop_preheader_edge (loop));
5323 
5324   /* FORNOW: the vectorizer supports only loops which body consist
5325      of one basic block (header + empty latch). When the vectorizer will
5326      support more involved loop forms, the order by which the BBs are
5327      traversed need to be reconsidered.  */
5328 
5329   for (i = 0; i < nbbs; i++)
5330     {
5331       basic_block bb = bbs[i];
5332       stmt_vec_info stmt_info;
5333       gimple phi;
5334 
5335       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5336         {
5337 	  phi = gsi_stmt (si);
5338 	  if (vect_print_dump_info (REPORT_DETAILS))
5339 	    {
5340 	      fprintf (vect_dump, "------>vectorizing phi: ");
5341 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
5342 	    }
5343 	  stmt_info = vinfo_for_stmt (phi);
5344 	  if (!stmt_info)
5345 	    continue;
5346 
5347 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5348 	    vect_loop_kill_debug_uses (loop, phi);
5349 
5350 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5351 	      && !STMT_VINFO_LIVE_P (stmt_info))
5352 	    continue;
5353 
5354 	  if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5355 	        != (unsigned HOST_WIDE_INT) vectorization_factor)
5356 	      && vect_print_dump_info (REPORT_DETAILS))
5357 	    fprintf (vect_dump, "multiple-types.");
5358 
5359 	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5360 	    {
5361 	      if (vect_print_dump_info (REPORT_DETAILS))
5362 		fprintf (vect_dump, "transform phi.");
5363 	      vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5364 	    }
5365 	}
5366 
5367       pattern_stmt = NULL;
5368       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5369 	{
5370 	  bool is_store;
5371 
5372           if (transform_pattern_stmt)
5373 	    stmt = pattern_stmt;
5374           else
5375             stmt = gsi_stmt (si);
5376 
5377 	  if (vect_print_dump_info (REPORT_DETAILS))
5378 	    {
5379 	      fprintf (vect_dump, "------>vectorizing statement: ");
5380 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5381 	    }
5382 
5383 	  stmt_info = vinfo_for_stmt (stmt);
5384 
5385 	  /* vector stmts created in the outer-loop during vectorization of
5386 	     stmts in an inner-loop may not have a stmt_info, and do not
5387 	     need to be vectorized.  */
5388 	  if (!stmt_info)
5389 	    {
5390 	      gsi_next (&si);
5391 	      continue;
5392 	    }
5393 
5394 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5395 	    vect_loop_kill_debug_uses (loop, stmt);
5396 
5397 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5398 	      && !STMT_VINFO_LIVE_P (stmt_info))
5399             {
5400               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5401                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5402                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5403                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5404                 {
5405                   stmt = pattern_stmt;
5406                   stmt_info = vinfo_for_stmt (stmt);
5407                 }
5408               else
5409 	        {
5410    	          gsi_next (&si);
5411 	          continue;
5412                 }
5413 	    }
5414           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5415                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5416                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5417                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5418             transform_pattern_stmt = true;
5419 
5420 	  /* If pattern statement has def stmts, vectorize them too.  */
5421 	  if (is_pattern_stmt_p (stmt_info))
5422 	    {
5423 	      if (pattern_def_seq == NULL)
5424 		{
5425 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5426 		  pattern_def_si = gsi_start (pattern_def_seq);
5427 		}
5428 	      else if (!gsi_end_p (pattern_def_si))
5429 		gsi_next (&pattern_def_si);
5430 	      if (pattern_def_seq != NULL)
5431 		{
5432 		  gimple pattern_def_stmt = NULL;
5433 		  stmt_vec_info pattern_def_stmt_info = NULL;
5434 
5435 		  while (!gsi_end_p (pattern_def_si))
5436 		    {
5437 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
5438 		      pattern_def_stmt_info
5439 			= vinfo_for_stmt (pattern_def_stmt);
5440 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5441 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5442 			break;
5443 		      gsi_next (&pattern_def_si);
5444 		    }
5445 
5446 		  if (!gsi_end_p (pattern_def_si))
5447 		    {
5448 		      if (vect_print_dump_info (REPORT_DETAILS))
5449 			{
5450 			  fprintf (vect_dump, "==> vectorizing pattern def"
5451 					      " stmt: ");
5452 			  print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
5453 					     TDF_SLIM);
5454 			}
5455 
5456 		      stmt = pattern_def_stmt;
5457 		      stmt_info = pattern_def_stmt_info;
5458 		    }
5459 		  else
5460 		    {
5461 		      pattern_def_si = gsi_start (NULL);
5462 		      transform_pattern_stmt = false;
5463 		    }
5464 		}
5465 	      else
5466 		transform_pattern_stmt = false;
5467             }
5468 
5469 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5470 	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5471                                                STMT_VINFO_VECTYPE (stmt_info));
5472 	  if (!STMT_SLP_TYPE (stmt_info)
5473 	      && nunits != (unsigned int) vectorization_factor
5474               && vect_print_dump_info (REPORT_DETAILS))
5475 	    /* For SLP VF is set according to unrolling factor, and not to
5476 	       vector size, hence for SLP this print is not valid.  */
5477             fprintf (vect_dump, "multiple-types.");
5478 
5479 	  /* SLP. Schedule all the SLP instances when the first SLP stmt is
5480 	     reached.  */
5481 	  if (STMT_SLP_TYPE (stmt_info))
5482 	    {
5483 	      if (!slp_scheduled)
5484 		{
5485 		  slp_scheduled = true;
5486 
5487 		  if (vect_print_dump_info (REPORT_DETAILS))
5488 		    fprintf (vect_dump, "=== scheduling SLP instances ===");
5489 
5490 		  vect_schedule_slp (loop_vinfo, NULL);
5491 		}
5492 
5493 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5494 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5495 		{
5496 		  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5497 		    {
5498 		      pattern_def_seq = NULL;
5499 		      gsi_next (&si);
5500 		    }
5501 		  continue;
5502 		}
5503 	    }
5504 
5505 	  /* -------- vectorize statement ------------ */
5506 	  if (vect_print_dump_info (REPORT_DETAILS))
5507 	    fprintf (vect_dump, "transform statement.");
5508 
5509 	  strided_store = false;
5510 	  is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
5511           if (is_store)
5512             {
5513 	      if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5514 		{
5515 		  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5516 		     interleaving chain was completed - free all the stores in
5517 		     the chain.  */
5518 		  gsi_next (&si);
5519 		  vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5520  		  continue;
5521 		}
5522 	      else
5523 		{
5524 		  /* Free the attached stmt_vec_info and remove the stmt.  */
5525 		  free_stmt_vec_info (gsi_stmt (si));
5526 		  gsi_remove (&si, true);
5527 		  continue;
5528 		}
5529 	    }
5530 
5531 	  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5532 	    {
5533 	      pattern_def_seq = NULL;
5534 	      gsi_next (&si);
5535 	    }
5536 	}		        /* stmts in BB */
5537     }				/* BBs in loop */
5538 
5539   slpeel_make_loop_iterate_ntimes (loop, ratio);
5540 
5541   /* The memory tags and pointers in vectorized statements need to
5542      have their SSA forms updated.  FIXME, why can't this be delayed
5543      until all the loops have been transformed?  */
5544   update_ssa (TODO_update_ssa);
5545 
5546   if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5547     fprintf (vect_dump, "LOOP VECTORIZED.");
5548   if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5549     fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
5550 }
5551