1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3    Free Software Foundation, Inc.
4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
5    and Ira Rosen <irar@il.ibm.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 
44 
45 /* Return a variable of type ELEM_TYPE[NELEMS].  */
46 
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 {
50   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 			 "vect_array");
52 }
53 
54 /* ARRAY is an array of vectors created by create_vector_array.
55    Return an SSA_NAME for the vector in index N.  The reference
56    is part of the vectorization of STMT and the vector is associated
57    with scalar destination SCALAR_DEST.  */
58 
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 		   tree array, unsigned HOST_WIDE_INT n)
62 {
63   tree vect_type, vect, vect_name, array_ref;
64   gimple new_stmt;
65 
66   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67   vect_type = TREE_TYPE (TREE_TYPE (array));
68   vect = vect_create_destination_var (scalar_dest, vect_type);
69   array_ref = build4 (ARRAY_REF, vect_type, array,
70 		      build_int_cst (size_type_node, n),
71 		      NULL_TREE, NULL_TREE);
72 
73   new_stmt = gimple_build_assign (vect, array_ref);
74   vect_name = make_ssa_name (vect, new_stmt);
75   gimple_assign_set_lhs (new_stmt, vect_name);
76   vect_finish_stmt_generation (stmt, new_stmt, gsi);
77   mark_symbols_for_renaming (new_stmt);
78 
79   return vect_name;
80 }
81 
82 /* ARRAY is an array of vectors created by create_vector_array.
83    Emit code to store SSA_NAME VECT in index N of the array.
84    The store is part of the vectorization of STMT.  */
85 
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 		    tree array, unsigned HOST_WIDE_INT n)
89 {
90   tree array_ref;
91   gimple new_stmt;
92 
93   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 		      build_int_cst (size_type_node, n),
95 		      NULL_TREE, NULL_TREE);
96 
97   new_stmt = gimple_build_assign (array_ref, vect);
98   vect_finish_stmt_generation (stmt, new_stmt, gsi);
99   mark_symbols_for_renaming (new_stmt);
100 }
101 
102 /* PTR is a pointer to an array of type TYPE.  Return a representation
103    of *PTR.  The memory reference replaces those in FIRST_DR
104    (and its group).  */
105 
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108 {
109   struct ptr_info_def *pi;
110   tree mem_ref, alias_ptr_type;
111 
112   alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114   /* Arrays have the same alignment as their type.  */
115   pi = get_ptr_info (ptr);
116   pi->align = TYPE_ALIGN_UNIT (type);
117   pi->misalign = 0;
118   return mem_ref;
119 }
120 
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
122 
123 /* Function vect_mark_relevant.
124 
125    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
126 
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 		    enum vect_relevant relevant, bool live_p,
130 		    bool used_in_pattern)
131 {
132   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135   gimple pattern_stmt;
136 
137   if (vect_print_dump_info (REPORT_DETAILS))
138     fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139 
140   /* If this stmt is an original stmt in a pattern, we might need to mark its
141      related pattern stmt instead of the original stmt.  However, such stmts
142      may have their own uses that are not in any pattern, in such cases the
143      stmt itself should be marked.  */
144   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145     {
146       bool found = false;
147       if (!used_in_pattern)
148         {
149           imm_use_iterator imm_iter;
150           use_operand_p use_p;
151           gimple use_stmt;
152           tree lhs;
153 	  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
154 	  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
155 
156           if (is_gimple_assign (stmt))
157             lhs = gimple_assign_lhs (stmt);
158           else
159             lhs = gimple_call_lhs (stmt);
160 
161           /* This use is out of pattern use, if LHS has other uses that are
162              pattern uses, we should mark the stmt itself, and not the pattern
163              stmt.  */
164 	  if (TREE_CODE (lhs) == SSA_NAME)
165 	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
166 	      {
167 		if (is_gimple_debug (USE_STMT (use_p)))
168 		  continue;
169 		use_stmt = USE_STMT (use_p);
170 
171 		if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
172 		  continue;
173 
174 		if (vinfo_for_stmt (use_stmt)
175 		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
176 		  {
177 		    found = true;
178 		    break;
179 		  }
180 	      }
181         }
182 
183       if (!found)
184         {
185           /* This is the last stmt in a sequence that was detected as a
186              pattern that can potentially be vectorized.  Don't mark the stmt
187              as relevant/live because it's not going to be vectorized.
188              Instead mark the pattern-stmt that replaces it.  */
189 
190           pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
191 
192           if (vect_print_dump_info (REPORT_DETAILS))
193             fprintf (vect_dump, "last stmt in pattern. don't mark"
194                                 " relevant/live.");
195           stmt_info = vinfo_for_stmt (pattern_stmt);
196           gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
197           save_relevant = STMT_VINFO_RELEVANT (stmt_info);
198           save_live_p = STMT_VINFO_LIVE_P (stmt_info);
199           stmt = pattern_stmt;
200         }
201     }
202 
203   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
204   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
205     STMT_VINFO_RELEVANT (stmt_info) = relevant;
206 
207   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
208       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
209     {
210       if (vect_print_dump_info (REPORT_DETAILS))
211         fprintf (vect_dump, "already marked relevant/live.");
212       return;
213     }
214 
215   VEC_safe_push (gimple, heap, *worklist, stmt);
216 }
217 
218 
219 /* Function vect_stmt_relevant_p.
220 
221    Return true if STMT in loop that is represented by LOOP_VINFO is
222    "relevant for vectorization".
223 
224    A stmt is considered "relevant for vectorization" if:
225    - it has uses outside the loop.
226    - it has vdefs (it alters memory).
227    - control stmts in the loop (except for the exit condition).
228 
229    CHECKME: what other side effects would the vectorizer allow?  */
230 
231 static bool
232 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
233 		      enum vect_relevant *relevant, bool *live_p)
234 {
235   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
236   ssa_op_iter op_iter;
237   imm_use_iterator imm_iter;
238   use_operand_p use_p;
239   def_operand_p def_p;
240 
241   *relevant = vect_unused_in_scope;
242   *live_p = false;
243 
244   /* cond stmt other than loop exit cond.  */
245   if (is_ctrl_stmt (stmt)
246       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
247          != loop_exit_ctrl_vec_info_type)
248     *relevant = vect_used_in_scope;
249 
250   /* changing memory.  */
251   if (gimple_code (stmt) != GIMPLE_PHI)
252     if (gimple_vdef (stmt))
253       {
254 	if (vect_print_dump_info (REPORT_DETAILS))
255 	  fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
256 	*relevant = vect_used_in_scope;
257       }
258 
259   /* uses outside the loop.  */
260   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
261     {
262       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
263 	{
264 	  basic_block bb = gimple_bb (USE_STMT (use_p));
265 	  if (!flow_bb_inside_loop_p (loop, bb))
266 	    {
267 	      if (vect_print_dump_info (REPORT_DETAILS))
268 		fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
269 
270 	      if (is_gimple_debug (USE_STMT (use_p)))
271 		continue;
272 
273 	      /* We expect all such uses to be in the loop exit phis
274 		 (because of loop closed form)   */
275 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
276 	      gcc_assert (bb == single_exit (loop)->dest);
277 
278               *live_p = true;
279 	    }
280 	}
281     }
282 
283   return (*live_p || *relevant);
284 }
285 
286 
287 /* Function exist_non_indexing_operands_for_use_p
288 
289    USE is one of the uses attached to STMT.  Check if USE is
290    used in STMT for anything other than indexing an array.  */
291 
292 static bool
293 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
294 {
295   tree operand;
296   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
297 
298   /* USE corresponds to some operand in STMT.  If there is no data
299      reference in STMT, then any operand that corresponds to USE
300      is not indexing an array.  */
301   if (!STMT_VINFO_DATA_REF (stmt_info))
302     return true;
303 
304   /* STMT has a data_ref. FORNOW this means that its of one of
305      the following forms:
306      -1- ARRAY_REF = var
307      -2- var = ARRAY_REF
308      (This should have been verified in analyze_data_refs).
309 
310      'var' in the second case corresponds to a def, not a use,
311      so USE cannot correspond to any operands that are not used
312      for array indexing.
313 
314      Therefore, all we need to check is if STMT falls into the
315      first case, and whether var corresponds to USE.  */
316 
317   if (!gimple_assign_copy_p (stmt))
318     return false;
319   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
320     return false;
321   operand = gimple_assign_rhs1 (stmt);
322   if (TREE_CODE (operand) != SSA_NAME)
323     return false;
324 
325   if (operand == use)
326     return true;
327 
328   return false;
329 }
330 
331 
332 /*
333    Function process_use.
334 
335    Inputs:
336    - a USE in STMT in a loop represented by LOOP_VINFO
337    - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
338      that defined USE.  This is done by calling mark_relevant and passing it
339      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
340    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
341      be performed.
342 
343    Outputs:
344    Generally, LIVE_P and RELEVANT are used to define the liveness and
345    relevance info of the DEF_STMT of this USE:
346        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
347        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
348    Exceptions:
349    - case 1: If USE is used only for address computations (e.g. array indexing),
350    which does not need to be directly vectorized, then the liveness/relevance
351    of the respective DEF_STMT is left unchanged.
352    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
353    skip DEF_STMT cause it had already been processed.
354    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
355    be modified accordingly.
356 
357    Return true if everything is as expected. Return false otherwise.  */
358 
359 static bool
360 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
361 	     enum vect_relevant relevant, VEC(gimple,heap) **worklist,
362 	     bool force)
363 {
364   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
365   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
366   stmt_vec_info dstmt_vinfo;
367   basic_block bb, def_bb;
368   tree def;
369   gimple def_stmt;
370   enum vect_def_type dt;
371 
372   /* case 1: we are only interested in uses that need to be vectorized.  Uses
373      that are used for address computation are not considered relevant.  */
374   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
375      return true;
376 
377   if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
378     {
379       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
380         fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
381       return false;
382     }
383 
384   if (!def_stmt || gimple_nop_p (def_stmt))
385     return true;
386 
387   def_bb = gimple_bb (def_stmt);
388   if (!flow_bb_inside_loop_p (loop, def_bb))
389     {
390       if (vect_print_dump_info (REPORT_DETAILS))
391 	fprintf (vect_dump, "def_stmt is out of loop.");
392       return true;
393     }
394 
395   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
396      DEF_STMT must have already been processed, because this should be the
397      only way that STMT, which is a reduction-phi, was put in the worklist,
398      as there should be no other uses for DEF_STMT in the loop.  So we just
399      check that everything is as expected, and we are done.  */
400   dstmt_vinfo = vinfo_for_stmt (def_stmt);
401   bb = gimple_bb (stmt);
402   if (gimple_code (stmt) == GIMPLE_PHI
403       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
404       && gimple_code (def_stmt) != GIMPLE_PHI
405       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
406       && bb->loop_father == def_bb->loop_father)
407     {
408       if (vect_print_dump_info (REPORT_DETAILS))
409 	fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
410       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
411 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
412       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
413       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
414 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
415       return true;
416     }
417 
418   /* case 3a: outer-loop stmt defining an inner-loop stmt:
419 	outer-loop-header-bb:
420 		d = def_stmt
421 	inner-loop:
422 		stmt # use (d)
423 	outer-loop-tail-bb:
424 		...		  */
425   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
426     {
427       if (vect_print_dump_info (REPORT_DETAILS))
428 	fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
429 
430       switch (relevant)
431 	{
432 	case vect_unused_in_scope:
433 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
434 		      vect_used_in_scope : vect_unused_in_scope;
435 	  break;
436 
437 	case vect_used_in_outer_by_reduction:
438           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 	  relevant = vect_used_by_reduction;
440 	  break;
441 
442 	case vect_used_in_outer:
443           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
444 	  relevant = vect_used_in_scope;
445 	  break;
446 
447 	case vect_used_in_scope:
448 	  break;
449 
450 	default:
451 	  gcc_unreachable ();
452 	}
453     }
454 
455   /* case 3b: inner-loop stmt defining an outer-loop stmt:
456 	outer-loop-header-bb:
457 		...
458 	inner-loop:
459 		d = def_stmt
460 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
461 		stmt # use (d)		*/
462   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
463     {
464       if (vect_print_dump_info (REPORT_DETAILS))
465 	fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
466 
467       switch (relevant)
468         {
469         case vect_unused_in_scope:
470           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
472                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
473           break;
474 
475         case vect_used_by_reduction:
476           relevant = vect_used_in_outer_by_reduction;
477           break;
478 
479         case vect_used_in_scope:
480           relevant = vect_used_in_outer;
481           break;
482 
483         default:
484           gcc_unreachable ();
485         }
486     }
487 
488   vect_mark_relevant (worklist, def_stmt, relevant, live_p,
489                       is_pattern_stmt_p (stmt_vinfo));
490   return true;
491 }
492 
493 
494 /* Function vect_mark_stmts_to_be_vectorized.
495 
496    Not all stmts in the loop need to be vectorized. For example:
497 
498      for i...
499        for j...
500    1.    T0 = i + j
501    2.	 T1 = a[T0]
502 
503    3.    j = j + 1
504 
505    Stmt 1 and 3 do not need to be vectorized, because loop control and
506    addressing of vectorized data-refs are handled differently.
507 
508    This pass detects such stmts.  */
509 
510 bool
511 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
512 {
513   VEC(gimple,heap) *worklist;
514   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
515   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
516   unsigned int nbbs = loop->num_nodes;
517   gimple_stmt_iterator si;
518   gimple stmt;
519   unsigned int i;
520   stmt_vec_info stmt_vinfo;
521   basic_block bb;
522   gimple phi;
523   bool live_p;
524   enum vect_relevant relevant, tmp_relevant;
525   enum vect_def_type def_type;
526 
527   if (vect_print_dump_info (REPORT_DETAILS))
528     fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
529 
530   worklist = VEC_alloc (gimple, heap, 64);
531 
532   /* 1. Init worklist.  */
533   for (i = 0; i < nbbs; i++)
534     {
535       bb = bbs[i];
536       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
537 	{
538 	  phi = gsi_stmt (si);
539 	  if (vect_print_dump_info (REPORT_DETAILS))
540 	    {
541 	      fprintf (vect_dump, "init: phi relevant? ");
542 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
543 	    }
544 
545 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
546 	    vect_mark_relevant (&worklist, phi, relevant, live_p, false);
547 	}
548       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
549 	{
550 	  stmt = gsi_stmt (si);
551 	  if (vect_print_dump_info (REPORT_DETAILS))
552 	    {
553 	      fprintf (vect_dump, "init: stmt relevant? ");
554 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
555 	    }
556 
557 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
558             vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
559 	}
560     }
561 
562   /* 2. Process_worklist */
563   while (VEC_length (gimple, worklist) > 0)
564     {
565       use_operand_p use_p;
566       ssa_op_iter iter;
567 
568       stmt = VEC_pop (gimple, worklist);
569       if (vect_print_dump_info (REPORT_DETAILS))
570 	{
571           fprintf (vect_dump, "worklist: examine stmt: ");
572           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
573 	}
574 
575       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
576 	 (DEF_STMT) as relevant/irrelevant and live/dead according to the
577 	 liveness and relevance properties of STMT.  */
578       stmt_vinfo = vinfo_for_stmt (stmt);
579       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
580       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
581 
582       /* Generally, the liveness and relevance properties of STMT are
583 	 propagated as is to the DEF_STMTs of its USEs:
584 	  live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
585 	  relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
586 
587 	 One exception is when STMT has been identified as defining a reduction
588 	 variable; in this case we set the liveness/relevance as follows:
589 	   live_p = false
590 	   relevant = vect_used_by_reduction
591 	 This is because we distinguish between two kinds of relevant stmts -
592 	 those that are used by a reduction computation, and those that are
593 	 (also) used by a regular computation.  This allows us later on to
594 	 identify stmts that are used solely by a reduction, and therefore the
595 	 order of the results that they produce does not have to be kept.  */
596 
597       def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
598       tmp_relevant = relevant;
599       switch (def_type)
600         {
601           case vect_reduction_def:
602 	    switch (tmp_relevant)
603 	      {
604 	        case vect_unused_in_scope:
605 	          relevant = vect_used_by_reduction;
606 	          break;
607 
608 	        case vect_used_by_reduction:
609 	          if (gimple_code (stmt) == GIMPLE_PHI)
610                     break;
611   	          /* fall through */
612 
613 	        default:
614 	          if (vect_print_dump_info (REPORT_DETAILS))
615 	            fprintf (vect_dump, "unsupported use of reduction.");
616 
617   	          VEC_free (gimple, heap, worklist);
618 	          return false;
619 	      }
620 
621 	    live_p = false;
622 	    break;
623 
624           case vect_nested_cycle:
625             if (tmp_relevant != vect_unused_in_scope
626                 && tmp_relevant != vect_used_in_outer_by_reduction
627                 && tmp_relevant != vect_used_in_outer)
628               {
629                 if (vect_print_dump_info (REPORT_DETAILS))
630                   fprintf (vect_dump, "unsupported use of nested cycle.");
631 
632                 VEC_free (gimple, heap, worklist);
633                 return false;
634               }
635 
636             live_p = false;
637             break;
638 
639           case vect_double_reduction_def:
640             if (tmp_relevant != vect_unused_in_scope
641                 && tmp_relevant != vect_used_by_reduction)
642               {
643                 if (vect_print_dump_info (REPORT_DETAILS))
644                   fprintf (vect_dump, "unsupported use of double reduction.");
645 
646                 VEC_free (gimple, heap, worklist);
647                 return false;
648               }
649 
650             live_p = false;
651             break;
652 
653           default:
654             break;
655         }
656 
657       if (is_pattern_stmt_p (stmt_vinfo))
658         {
659           /* Pattern statements are not inserted into the code, so
660              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
661              have to scan the RHS or function arguments instead.  */
662           if (is_gimple_assign (stmt))
663             {
664 	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
665 	      tree op = gimple_assign_rhs1 (stmt);
666 
667 	      i = 1;
668 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
669 		{
670 		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
671 				    live_p, relevant, &worklist, false)
672 		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
673 				       live_p, relevant, &worklist, false))
674 		    {
675 		      VEC_free (gimple, heap, worklist);
676 		      return false;
677 		    }
678 		  i = 2;
679 		}
680 	      for (; i < gimple_num_ops (stmt); i++)
681                 {
682 		  op = gimple_op (stmt, i);
683                   if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
684 				    &worklist, false))
685                     {
686                       VEC_free (gimple, heap, worklist);
687                       return false;
688                     }
689                  }
690             }
691           else if (is_gimple_call (stmt))
692             {
693               for (i = 0; i < gimple_call_num_args (stmt); i++)
694                 {
695                   tree arg = gimple_call_arg (stmt, i);
696                   if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
697 				    &worklist, false))
698                     {
699                       VEC_free (gimple, heap, worklist);
700                       return false;
701                     }
702                 }
703             }
704         }
705       else
706         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
707           {
708             tree op = USE_FROM_PTR (use_p);
709             if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
710 			      &worklist, false))
711               {
712                 VEC_free (gimple, heap, worklist);
713                 return false;
714               }
715           }
716 
717       if (STMT_VINFO_GATHER_P (stmt_vinfo))
718 	{
719 	  tree off;
720 	  tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
721 	  gcc_assert (decl);
722 	  if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
723 			    &worklist, true))
724 	    {
725 	      VEC_free (gimple, heap, worklist);
726 	      return false;
727 	    }
728 	}
729     } /* while worklist */
730 
731   VEC_free (gimple, heap, worklist);
732   return true;
733 }
734 
735 
736 /* Get cost by calling cost target builtin.  */
737 
738 static inline
739 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
740 {
741   tree dummy_type = NULL;
742   int dummy = 0;
743 
744   return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
745                                                        dummy_type, dummy);
746 }
747 
748 
749 /* Get cost for STMT.  */
750 
751 int
752 cost_for_stmt (gimple stmt)
753 {
754   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
755 
756   switch (STMT_VINFO_TYPE (stmt_info))
757   {
758   case load_vec_info_type:
759     return vect_get_stmt_cost (scalar_load);
760   case store_vec_info_type:
761     return vect_get_stmt_cost (scalar_store);
762   case op_vec_info_type:
763   case condition_vec_info_type:
764   case assignment_vec_info_type:
765   case reduc_vec_info_type:
766   case induc_vec_info_type:
767   case type_promotion_vec_info_type:
768   case type_demotion_vec_info_type:
769   case type_conversion_vec_info_type:
770   case call_vec_info_type:
771     return vect_get_stmt_cost (scalar_stmt);
772   case undef_vec_info_type:
773   default:
774     gcc_unreachable ();
775   }
776 }
777 
778 /* Function vect_model_simple_cost.
779 
780    Models cost for simple operations, i.e. those that only emit ncopies of a
781    single op.  Right now, this does not account for multiple insns that could
782    be generated for the single vector op.  We will handle that shortly.  */
783 
784 void
785 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
786 			enum vect_def_type *dt, slp_tree slp_node)
787 {
788   int i;
789   int inside_cost = 0, outside_cost = 0;
790 
791   /* The SLP costs were already calculated during SLP tree build.  */
792   if (PURE_SLP_STMT (stmt_info))
793     return;
794 
795   inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
796 
797   /* FORNOW: Assuming maximum 2 args per stmts.  */
798   for (i = 0; i < 2; i++)
799     {
800       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
801 	outside_cost += vect_get_stmt_cost (vector_stmt);
802     }
803 
804   if (vect_print_dump_info (REPORT_COST))
805     fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
806              "outside_cost = %d .", inside_cost, outside_cost);
807 
808   /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
809   stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
810   stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
811 }
812 
813 
814 /* Model cost for type demotion and promotion operations.  PWR is normally
815    zero for single-step promotions and demotions.  It will be one if
816    two-step promotion/demotion is required, and so on.  Each additional
817    step doubles the number of instructions required.  */
818 
819 static void
820 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
821 				    enum vect_def_type *dt, int pwr)
822 {
823   int i, tmp;
824   int inside_cost = 0, outside_cost = 0, single_stmt_cost;
825 
826   /* The SLP costs were already calculated during SLP tree build.  */
827   if (PURE_SLP_STMT (stmt_info))
828     return;
829 
830   single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
831   for (i = 0; i < pwr + 1; i++)
832     {
833       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
834 	(i + 1) : i;
835       inside_cost += vect_pow2 (tmp) * single_stmt_cost;
836     }
837 
838   /* FORNOW: Assuming maximum 2 args per stmts.  */
839   for (i = 0; i < 2; i++)
840     {
841       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
842         outside_cost += vect_get_stmt_cost (vector_stmt);
843     }
844 
845   if (vect_print_dump_info (REPORT_COST))
846     fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
847              "outside_cost = %d .", inside_cost, outside_cost);
848 
849   /* Set the costs in STMT_INFO.  */
850   stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
851   stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
852 }
853 
854 /* Function vect_cost_strided_group_size
855 
856    For strided load or store, return the group_size only if it is the first
857    load or store of a group, else return 1.  This ensures that group size is
858    only returned once per group.  */
859 
860 static int
861 vect_cost_strided_group_size (stmt_vec_info stmt_info)
862 {
863   gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
864 
865   if (first_stmt == STMT_VINFO_STMT (stmt_info))
866     return GROUP_SIZE (stmt_info);
867 
868   return 1;
869 }
870 
871 
872 /* Function vect_model_store_cost
873 
874    Models cost for stores.  In the case of strided accesses, one access
875    has the overhead of the strided access attributed to it.  */
876 
877 void
878 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
879 		       bool store_lanes_p, enum vect_def_type dt,
880 		       slp_tree slp_node)
881 {
882   int group_size;
883   unsigned int inside_cost = 0, outside_cost = 0;
884   struct data_reference *first_dr;
885   gimple first_stmt;
886 
887   /* The SLP costs were already calculated during SLP tree build.  */
888   if (PURE_SLP_STMT (stmt_info))
889     return;
890 
891   if (dt == vect_constant_def || dt == vect_external_def)
892     outside_cost = vect_get_stmt_cost (scalar_to_vec);
893 
894   /* Strided access?  */
895   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
896     {
897       if (slp_node)
898         {
899           first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
900           group_size = 1;
901         }
902       else
903         {
904           first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
905           group_size = vect_cost_strided_group_size (stmt_info);
906         }
907 
908       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
909     }
910   /* Not a strided access.  */
911   else
912     {
913       group_size = 1;
914       first_dr = STMT_VINFO_DATA_REF (stmt_info);
915     }
916 
917   /* We assume that the cost of a single store-lanes instruction is
918      equivalent to the cost of GROUP_SIZE separate stores.  If a strided
919      access is instead being provided by a permute-and-store operation,
920      include the cost of the permutes.  */
921   if (!store_lanes_p && group_size > 1)
922     {
923       /* Uses a high and low interleave operation for each needed permute.  */
924       inside_cost = ncopies * exact_log2(group_size) * group_size
925         * vect_get_stmt_cost (vec_perm);
926 
927       if (vect_print_dump_info (REPORT_COST))
928         fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
929                  group_size);
930     }
931 
932   /* Costs of the stores.  */
933   vect_get_store_cost (first_dr, ncopies, &inside_cost);
934 
935   if (vect_print_dump_info (REPORT_COST))
936     fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
937              "outside_cost = %d .", inside_cost, outside_cost);
938 
939   /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
940   stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
941   stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
942 }
943 
944 
945 /* Calculate cost of DR's memory access.  */
946 void
947 vect_get_store_cost (struct data_reference *dr, int ncopies,
948                      unsigned int *inside_cost)
949 {
950   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
951 
952   switch (alignment_support_scheme)
953     {
954     case dr_aligned:
955       {
956         *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
957 
958         if (vect_print_dump_info (REPORT_COST))
959           fprintf (vect_dump, "vect_model_store_cost: aligned.");
960 
961         break;
962       }
963 
964     case dr_unaligned_supported:
965       {
966         gimple stmt = DR_STMT (dr);
967         stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968         tree vectype = STMT_VINFO_VECTYPE (stmt_info);
969 
970         /* Here, we assign an additional cost for the unaligned store.  */
971         *inside_cost += ncopies
972           * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
973                                  vectype, DR_MISALIGNMENT (dr));
974 
975         if (vect_print_dump_info (REPORT_COST))
976           fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
977                    "hardware.");
978 
979         break;
980       }
981 
982     default:
983       gcc_unreachable ();
984     }
985 }
986 
987 
988 /* Function vect_model_load_cost
989 
990    Models cost for loads.  In the case of strided accesses, the last access
991    has the overhead of the strided access attributed to it.  Since unaligned
992    accesses are supported for loads, we also account for the costs of the
993    access scheme chosen.  */
994 
995 void
996 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
997 		      slp_tree slp_node)
998 {
999   int group_size;
1000   gimple first_stmt;
1001   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1002   unsigned int inside_cost = 0, outside_cost = 0;
1003 
1004   /* The SLP costs were already calculated during SLP tree build.  */
1005   if (PURE_SLP_STMT (stmt_info))
1006     return;
1007 
1008   /* Strided accesses?  */
1009   first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1010   if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
1011     {
1012       group_size = vect_cost_strided_group_size (stmt_info);
1013       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1014     }
1015   /* Not a strided access.  */
1016   else
1017     {
1018       group_size = 1;
1019       first_dr = dr;
1020     }
1021 
1022   /* We assume that the cost of a single load-lanes instruction is
1023      equivalent to the cost of GROUP_SIZE separate loads.  If a strided
1024      access is instead being provided by a load-and-permute operation,
1025      include the cost of the permutes.  */
1026   if (!load_lanes_p && group_size > 1)
1027     {
1028       /* Uses an even and odd extract operations for each needed permute.  */
1029       inside_cost = ncopies * exact_log2(group_size) * group_size
1030 	* vect_get_stmt_cost (vec_perm);
1031 
1032       if (vect_print_dump_info (REPORT_COST))
1033         fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1034                  group_size);
1035     }
1036 
1037   /* The loads themselves.  */
1038   vect_get_load_cost (first_dr, ncopies,
1039          ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
1040           || slp_node),
1041          &inside_cost, &outside_cost);
1042 
1043   if (vect_print_dump_info (REPORT_COST))
1044     fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1045              "outside_cost = %d .", inside_cost, outside_cost);
1046 
1047   /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
1048   stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1049   stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1050 }
1051 
1052 
1053 /* Calculate cost of DR's memory access.  */
1054 void
1055 vect_get_load_cost (struct data_reference *dr, int ncopies,
1056                     bool add_realign_cost, unsigned int *inside_cost,
1057                     unsigned int *outside_cost)
1058 {
1059   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1060 
1061   switch (alignment_support_scheme)
1062     {
1063     case dr_aligned:
1064       {
1065         *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1066 
1067         if (vect_print_dump_info (REPORT_COST))
1068           fprintf (vect_dump, "vect_model_load_cost: aligned.");
1069 
1070         break;
1071       }
1072     case dr_unaligned_supported:
1073       {
1074         gimple stmt = DR_STMT (dr);
1075         stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1076         tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1077 
1078         /* Here, we assign an additional cost for the unaligned load.  */
1079         *inside_cost += ncopies
1080           * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1081                                            vectype, DR_MISALIGNMENT (dr));
1082         if (vect_print_dump_info (REPORT_COST))
1083           fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1084                    "hardware.");
1085 
1086         break;
1087       }
1088     case dr_explicit_realign:
1089       {
1090         *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1091 				   + vect_get_stmt_cost (vec_perm));
1092 
1093         /* FIXME: If the misalignment remains fixed across the iterations of
1094            the containing loop, the following cost should be added to the
1095            outside costs.  */
1096         if (targetm.vectorize.builtin_mask_for_load)
1097           *inside_cost += vect_get_stmt_cost (vector_stmt);
1098 
1099         if (vect_print_dump_info (REPORT_COST))
1100           fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1101 
1102         break;
1103       }
1104     case dr_explicit_realign_optimized:
1105       {
1106         if (vect_print_dump_info (REPORT_COST))
1107           fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1108                    "pipelined.");
1109 
1110         /* Unaligned software pipeline has a load of an address, an initial
1111            load, and possibly a mask operation to "prime" the loop.  However,
1112            if this is an access in a group of loads, which provide strided
1113            access, then the above cost should only be considered for one
1114            access in the group.  Inside the loop, there is a load op
1115            and a realignment op.  */
1116 
1117         if (add_realign_cost)
1118           {
1119             *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1120             if (targetm.vectorize.builtin_mask_for_load)
1121               *outside_cost += vect_get_stmt_cost (vector_stmt);
1122           }
1123 
1124         *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1125 				   + vect_get_stmt_cost (vec_perm));
1126 
1127         if (vect_print_dump_info (REPORT_COST))
1128           fprintf (vect_dump,
1129 		   "vect_model_load_cost: explicit realign optimized");
1130 
1131         break;
1132       }
1133 
1134     default:
1135       gcc_unreachable ();
1136     }
1137 }
1138 
1139 
1140 /* Function vect_init_vector.
1141 
1142    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1143    the vector elements of VECTOR_VAR.  Place the initialization at BSI if it
1144    is not NULL.  Otherwise, place the initialization at the loop preheader.
1145    Return the DEF of INIT_STMT.
1146    It will be used in the vectorization of STMT.  */
1147 
1148 tree
1149 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1150 		  gimple_stmt_iterator *gsi)
1151 {
1152   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1153   tree new_var;
1154   gimple init_stmt;
1155   tree vec_oprnd;
1156   edge pe;
1157   tree new_temp;
1158   basic_block new_bb;
1159 
1160   new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1161   add_referenced_var (new_var);
1162   init_stmt = gimple_build_assign  (new_var, vector_var);
1163   new_temp = make_ssa_name (new_var, init_stmt);
1164   gimple_assign_set_lhs (init_stmt, new_temp);
1165 
1166   if (gsi)
1167     vect_finish_stmt_generation (stmt, init_stmt, gsi);
1168   else
1169     {
1170       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1171 
1172       if (loop_vinfo)
1173         {
1174           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1175 
1176           if (nested_in_vect_loop_p (loop, stmt))
1177             loop = loop->inner;
1178 
1179 	  pe = loop_preheader_edge (loop);
1180           new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1181           gcc_assert (!new_bb);
1182 	}
1183       else
1184        {
1185           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1186           basic_block bb;
1187           gimple_stmt_iterator gsi_bb_start;
1188 
1189           gcc_assert (bb_vinfo);
1190           bb = BB_VINFO_BB (bb_vinfo);
1191           gsi_bb_start = gsi_after_labels (bb);
1192           gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1193        }
1194     }
1195 
1196   if (vect_print_dump_info (REPORT_DETAILS))
1197     {
1198       fprintf (vect_dump, "created new init_stmt: ");
1199       print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1200     }
1201 
1202   vec_oprnd = gimple_assign_lhs (init_stmt);
1203   return vec_oprnd;
1204 }
1205 
1206 
1207 /* Function vect_get_vec_def_for_operand.
1208 
1209    OP is an operand in STMT.  This function returns a (vector) def that will be
1210    used in the vectorized stmt for STMT.
1211 
1212    In the case that OP is an SSA_NAME which is defined in the loop, then
1213    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1214 
1215    In case OP is an invariant or constant, a new stmt that creates a vector def
1216    needs to be introduced.  */
1217 
1218 tree
1219 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1220 {
1221   tree vec_oprnd;
1222   gimple vec_stmt;
1223   gimple def_stmt;
1224   stmt_vec_info def_stmt_info = NULL;
1225   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226   unsigned int nunits;
1227   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228   tree vec_inv;
1229   tree vec_cst;
1230   tree t = NULL_TREE;
1231   tree def;
1232   int i;
1233   enum vect_def_type dt;
1234   bool is_simple_use;
1235   tree vector_type;
1236 
1237   if (vect_print_dump_info (REPORT_DETAILS))
1238     {
1239       fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1240       print_generic_expr (vect_dump, op, TDF_SLIM);
1241     }
1242 
1243   is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1244 				      &def_stmt, &def, &dt);
1245   gcc_assert (is_simple_use);
1246   if (vect_print_dump_info (REPORT_DETAILS))
1247     {
1248       if (def)
1249         {
1250           fprintf (vect_dump, "def =  ");
1251           print_generic_expr (vect_dump, def, TDF_SLIM);
1252         }
1253       if (def_stmt)
1254         {
1255           fprintf (vect_dump, "  def_stmt =  ");
1256 	  print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1257         }
1258     }
1259 
1260   switch (dt)
1261     {
1262     /* Case 1: operand is a constant.  */
1263     case vect_constant_def:
1264       {
1265 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1266 	gcc_assert (vector_type);
1267 	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1268 
1269 	if (scalar_def)
1270 	  *scalar_def = op;
1271 
1272         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1273         if (vect_print_dump_info (REPORT_DETAILS))
1274           fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1275 
1276         vec_cst = build_vector_from_val (vector_type,
1277 					 fold_convert (TREE_TYPE (vector_type),
1278 						       op));
1279         return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1280       }
1281 
1282     /* Case 2: operand is defined outside the loop - loop invariant.  */
1283     case vect_external_def:
1284       {
1285 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1286 	gcc_assert (vector_type);
1287 	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1288 
1289 	if (scalar_def)
1290 	  *scalar_def = def;
1291 
1292         /* Create 'vec_inv = {inv,inv,..,inv}'  */
1293         if (vect_print_dump_info (REPORT_DETAILS))
1294           fprintf (vect_dump, "Create vector_inv.");
1295 
1296         for (i = nunits - 1; i >= 0; --i)
1297           {
1298             t = tree_cons (NULL_TREE, def, t);
1299           }
1300 
1301 	/* FIXME: use build_constructor directly.  */
1302         vec_inv = build_constructor_from_list (vector_type, t);
1303         return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1304       }
1305 
1306     /* Case 3: operand is defined inside the loop.  */
1307     case vect_internal_def:
1308       {
1309 	if (scalar_def)
1310 	  *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1311 
1312         /* Get the def from the vectorized stmt.  */
1313         def_stmt_info = vinfo_for_stmt (def_stmt);
1314 
1315         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1316         /* Get vectorized pattern statement.  */
1317         if (!vec_stmt
1318             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1319             && !STMT_VINFO_RELEVANT (def_stmt_info))
1320           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1321                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1322         gcc_assert (vec_stmt);
1323 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1324 	  vec_oprnd = PHI_RESULT (vec_stmt);
1325 	else if (is_gimple_call (vec_stmt))
1326 	  vec_oprnd = gimple_call_lhs (vec_stmt);
1327 	else
1328 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1329         return vec_oprnd;
1330       }
1331 
1332     /* Case 4: operand is defined by a loop header phi - reduction  */
1333     case vect_reduction_def:
1334     case vect_double_reduction_def:
1335     case vect_nested_cycle:
1336       {
1337 	struct loop *loop;
1338 
1339 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1340 	loop = (gimple_bb (def_stmt))->loop_father;
1341 
1342         /* Get the def before the loop  */
1343         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1344         return get_initial_def_for_reduction (stmt, op, scalar_def);
1345      }
1346 
1347     /* Case 5: operand is defined by loop-header phi - induction.  */
1348     case vect_induction_def:
1349       {
1350 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1351 
1352         /* Get the def from the vectorized stmt.  */
1353         def_stmt_info = vinfo_for_stmt (def_stmt);
1354         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1355 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1356 	  vec_oprnd = PHI_RESULT (vec_stmt);
1357 	else
1358 	  vec_oprnd = gimple_get_lhs (vec_stmt);
1359         return vec_oprnd;
1360       }
1361 
1362     default:
1363       gcc_unreachable ();
1364     }
1365 }
1366 
1367 
1368 /* Function vect_get_vec_def_for_stmt_copy
1369 
1370    Return a vector-def for an operand.  This function is used when the
1371    vectorized stmt to be created (by the caller to this function) is a "copy"
1372    created in case the vectorized result cannot fit in one vector, and several
1373    copies of the vector-stmt are required.  In this case the vector-def is
1374    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1375    of the stmt that defines VEC_OPRND.
1376    DT is the type of the vector def VEC_OPRND.
1377 
1378    Context:
1379         In case the vectorization factor (VF) is bigger than the number
1380    of elements that can fit in a vectype (nunits), we have to generate
1381    more than one vector stmt to vectorize the scalar stmt.  This situation
1382    arises when there are multiple data-types operated upon in the loop; the
1383    smallest data-type determines the VF, and as a result, when vectorizing
1384    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1385    vector stmt (each computing a vector of 'nunits' results, and together
1386    computing 'VF' results in each iteration).  This function is called when
1387    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1388    which VF=16 and nunits=4, so the number of copies required is 4):
1389 
1390    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1391 
1392    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1393                         VS1.1:  vx.1 = memref1      VS1.2
1394                         VS1.2:  vx.2 = memref2      VS1.3
1395                         VS1.3:  vx.3 = memref3
1396 
1397    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1398                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1399                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1400                         VSnew.3:  vz3 = vx.3 + ...
1401 
1402    The vectorization of S1 is explained in vectorizable_load.
1403    The vectorization of S2:
1404         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1405    the function 'vect_get_vec_def_for_operand' is called to
1406    get the relevant vector-def for each operand of S2.  For operand x it
1407    returns  the vector-def 'vx.0'.
1408 
1409         To create the remaining copies of the vector-stmt (VSnew.j), this
1410    function is called to get the relevant vector-def for each operand.  It is
1411    obtained from the respective VS1.j stmt, which is recorded in the
1412    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1413 
1414         For example, to obtain the vector-def 'vx.1' in order to create the
1415    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1416    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1417    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1418    and return its def ('vx.1').
1419    Overall, to create the above sequence this function will be called 3 times:
1420         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1421         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1422         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1423 
1424 tree
1425 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1426 {
1427   gimple vec_stmt_for_operand;
1428   stmt_vec_info def_stmt_info;
1429 
1430   /* Do nothing; can reuse same def.  */
1431   if (dt == vect_external_def || dt == vect_constant_def )
1432     return vec_oprnd;
1433 
1434   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1435   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1436   gcc_assert (def_stmt_info);
1437   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1438   gcc_assert (vec_stmt_for_operand);
1439   vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1440   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1441     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1442   else
1443     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1444   return vec_oprnd;
1445 }
1446 
1447 
1448 /* Get vectorized definitions for the operands to create a copy of an original
1449    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1450 
1451 static void
1452 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1453 				 VEC(tree,heap) **vec_oprnds0,
1454 				 VEC(tree,heap) **vec_oprnds1)
1455 {
1456   tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1457 
1458   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1459   VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1460 
1461   if (vec_oprnds1 && *vec_oprnds1)
1462     {
1463       vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1464       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1465       VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1466     }
1467 }
1468 
1469 
1470 /* Get vectorized definitions for OP0 and OP1.
1471    REDUC_INDEX is the index of reduction operand in case of reduction,
1472    and -1 otherwise.  */
1473 
1474 void
1475 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1476 		   VEC (tree, heap) **vec_oprnds0,
1477 		   VEC (tree, heap) **vec_oprnds1,
1478 		   slp_tree slp_node, int reduc_index)
1479 {
1480   if (slp_node)
1481     {
1482       int nops = (op1 == NULL_TREE) ? 1 : 2;
1483       VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1484       VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1485 
1486       VEC_quick_push (tree, ops, op0);
1487       if (op1)
1488         VEC_quick_push (tree, ops, op1);
1489 
1490       vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1491 
1492       *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1493       if (op1)
1494         *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1495 
1496       VEC_free (tree, heap, ops);
1497       VEC_free (slp_void_p, heap, vec_defs);
1498     }
1499   else
1500     {
1501       tree vec_oprnd;
1502 
1503       *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1504       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1505       VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1506 
1507       if (op1)
1508 	{
1509 	  *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1510 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1511 	  VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1512 	}
1513     }
1514 }
1515 
1516 
1517 /* Function vect_finish_stmt_generation.
1518 
1519    Insert a new stmt.  */
1520 
1521 void
1522 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1523 			     gimple_stmt_iterator *gsi)
1524 {
1525   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1526   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1527   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1528 
1529   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1530 
1531   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1532 
1533   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1534                                                    bb_vinfo));
1535 
1536   if (vect_print_dump_info (REPORT_DETAILS))
1537     {
1538       fprintf (vect_dump, "add new stmt: ");
1539       print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1540     }
1541 
1542   gimple_set_location (vec_stmt, gimple_location (stmt));
1543 }
1544 
1545 /* Checks if CALL can be vectorized in type VECTYPE.  Returns
1546    a function declaration if the target has a vectorized version
1547    of the function, or NULL_TREE if the function cannot be vectorized.  */
1548 
1549 tree
1550 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1551 {
1552   tree fndecl = gimple_call_fndecl (call);
1553 
1554   /* We only handle functions that do not read or clobber memory -- i.e.
1555      const or novops ones.  */
1556   if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1557     return NULL_TREE;
1558 
1559   if (!fndecl
1560       || TREE_CODE (fndecl) != FUNCTION_DECL
1561       || !DECL_BUILT_IN (fndecl))
1562     return NULL_TREE;
1563 
1564   return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1565 						        vectype_in);
1566 }
1567 
1568 /* Function vectorizable_call.
1569 
1570    Check if STMT performs a function call that can be vectorized.
1571    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1572    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1573    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1574 
1575 static bool
1576 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1577 		   slp_tree slp_node)
1578 {
1579   tree vec_dest;
1580   tree scalar_dest;
1581   tree op, type;
1582   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1583   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1584   tree vectype_out, vectype_in;
1585   int nunits_in;
1586   int nunits_out;
1587   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1588   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1589   tree fndecl, new_temp, def, rhs_type;
1590   gimple def_stmt;
1591   enum vect_def_type dt[3]
1592     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1593   gimple new_stmt = NULL;
1594   int ncopies, j;
1595   VEC(tree, heap) *vargs = NULL;
1596   enum { NARROW, NONE, WIDEN } modifier;
1597   size_t i, nargs;
1598   tree lhs;
1599 
1600   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1601     return false;
1602 
1603   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1604     return false;
1605 
1606   /* Is STMT a vectorizable call?   */
1607   if (!is_gimple_call (stmt))
1608     return false;
1609 
1610   if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1611     return false;
1612 
1613   if (stmt_can_throw_internal (stmt))
1614     return false;
1615 
1616   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1617 
1618   /* Process function arguments.  */
1619   rhs_type = NULL_TREE;
1620   vectype_in = NULL_TREE;
1621   nargs = gimple_call_num_args (stmt);
1622 
1623   /* Bail out if the function has more than three arguments, we do not have
1624      interesting builtin functions to vectorize with more than two arguments
1625      except for fma.  No arguments is also not good.  */
1626   if (nargs == 0 || nargs > 3)
1627     return false;
1628 
1629   for (i = 0; i < nargs; i++)
1630     {
1631       tree opvectype;
1632 
1633       op = gimple_call_arg (stmt, i);
1634 
1635       /* We can only handle calls with arguments of the same type.  */
1636       if (rhs_type
1637 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1638 	{
1639 	  if (vect_print_dump_info (REPORT_DETAILS))
1640 	    fprintf (vect_dump, "argument types differ.");
1641 	  return false;
1642 	}
1643       if (!rhs_type)
1644 	rhs_type = TREE_TYPE (op);
1645 
1646       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1647 				 &def_stmt, &def, &dt[i], &opvectype))
1648 	{
1649 	  if (vect_print_dump_info (REPORT_DETAILS))
1650 	    fprintf (vect_dump, "use not simple.");
1651 	  return false;
1652 	}
1653 
1654       if (!vectype_in)
1655 	vectype_in = opvectype;
1656       else if (opvectype
1657 	       && opvectype != vectype_in)
1658 	{
1659 	  if (vect_print_dump_info (REPORT_DETAILS))
1660 	    fprintf (vect_dump, "argument vector types differ.");
1661 	  return false;
1662 	}
1663     }
1664   /* If all arguments are external or constant defs use a vector type with
1665      the same size as the output vector type.  */
1666   if (!vectype_in)
1667     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1668   if (vec_stmt)
1669     gcc_assert (vectype_in);
1670   if (!vectype_in)
1671     {
1672       if (vect_print_dump_info (REPORT_DETAILS))
1673         {
1674           fprintf (vect_dump, "no vectype for scalar type ");
1675           print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1676         }
1677 
1678       return false;
1679     }
1680 
1681   /* FORNOW */
1682   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1683   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1684   if (nunits_in == nunits_out / 2)
1685     modifier = NARROW;
1686   else if (nunits_out == nunits_in)
1687     modifier = NONE;
1688   else if (nunits_out == nunits_in / 2)
1689     modifier = WIDEN;
1690   else
1691     return false;
1692 
1693   /* For now, we only vectorize functions if a target specific builtin
1694      is available.  TODO -- in some cases, it might be profitable to
1695      insert the calls for pieces of the vector, in order to be able
1696      to vectorize other operations in the loop.  */
1697   fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1698   if (fndecl == NULL_TREE)
1699     {
1700       if (vect_print_dump_info (REPORT_DETAILS))
1701 	fprintf (vect_dump, "function is not vectorizable.");
1702 
1703       return false;
1704     }
1705 
1706   gcc_assert (!gimple_vuse (stmt));
1707 
1708   if (slp_node || PURE_SLP_STMT (stmt_info))
1709     ncopies = 1;
1710   else if (modifier == NARROW)
1711     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1712   else
1713     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1714 
1715   /* Sanity check: make sure that at least one copy of the vectorized stmt
1716      needs to be generated.  */
1717   gcc_assert (ncopies >= 1);
1718 
1719   if (!vec_stmt) /* transformation not required.  */
1720     {
1721       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1722       if (vect_print_dump_info (REPORT_DETAILS))
1723         fprintf (vect_dump, "=== vectorizable_call ===");
1724       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1725       return true;
1726     }
1727 
1728   /** Transform.  **/
1729 
1730   if (vect_print_dump_info (REPORT_DETAILS))
1731     fprintf (vect_dump, "transform call.");
1732 
1733   /* Handle def.  */
1734   scalar_dest = gimple_call_lhs (stmt);
1735   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1736 
1737   prev_stmt_info = NULL;
1738   switch (modifier)
1739     {
1740     case NONE:
1741       for (j = 0; j < ncopies; ++j)
1742 	{
1743 	  /* Build argument list for the vectorized call.  */
1744 	  if (j == 0)
1745 	    vargs = VEC_alloc (tree, heap, nargs);
1746 	  else
1747 	    VEC_truncate (tree, vargs, 0);
1748 
1749 	  if (slp_node)
1750 	    {
1751 	      VEC (slp_void_p, heap) *vec_defs
1752 		= VEC_alloc (slp_void_p, heap, nargs);
1753 	      VEC (tree, heap) *vec_oprnds0;
1754 
1755 	      for (i = 0; i < nargs; i++)
1756 		VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1757 	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1758 	      vec_oprnds0
1759 		= (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1760 
1761 	      /* Arguments are ready.  Create the new vector stmt.  */
1762 	      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1763 		{
1764 		  size_t k;
1765 		  for (k = 0; k < nargs; k++)
1766 		    {
1767 		      VEC (tree, heap) *vec_oprndsk
1768 			= (VEC (tree, heap) *)
1769 			  VEC_index (slp_void_p, vec_defs, k);
1770 		      VEC_replace (tree, vargs, k,
1771 				   VEC_index (tree, vec_oprndsk, i));
1772 		    }
1773 		  new_stmt = gimple_build_call_vec (fndecl, vargs);
1774 		  new_temp = make_ssa_name (vec_dest, new_stmt);
1775 		  gimple_call_set_lhs (new_stmt, new_temp);
1776 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1777 		  mark_symbols_for_renaming (new_stmt);
1778 		  VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1779 				  new_stmt);
1780 		}
1781 
1782 	      for (i = 0; i < nargs; i++)
1783 		{
1784 		  VEC (tree, heap) *vec_oprndsi
1785 		    = (VEC (tree, heap) *)
1786 		      VEC_index (slp_void_p, vec_defs, i);
1787 		  VEC_free (tree, heap, vec_oprndsi);
1788 		}
1789 	      VEC_free (slp_void_p, heap, vec_defs);
1790 	      continue;
1791 	    }
1792 
1793 	  for (i = 0; i < nargs; i++)
1794 	    {
1795 	      op = gimple_call_arg (stmt, i);
1796 	      if (j == 0)
1797 		vec_oprnd0
1798 		  = vect_get_vec_def_for_operand (op, stmt, NULL);
1799 	      else
1800 		{
1801 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
1802 		  vec_oprnd0
1803                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1804 		}
1805 
1806 	      VEC_quick_push (tree, vargs, vec_oprnd0);
1807 	    }
1808 
1809 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
1810 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1811 	  gimple_call_set_lhs (new_stmt, new_temp);
1812 
1813 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1814 	  mark_symbols_for_renaming (new_stmt);
1815 
1816 	  if (j == 0)
1817 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1818 	  else
1819 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1820 
1821 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1822 	}
1823 
1824       break;
1825 
1826     case NARROW:
1827       for (j = 0; j < ncopies; ++j)
1828 	{
1829 	  /* Build argument list for the vectorized call.  */
1830 	  if (j == 0)
1831 	    vargs = VEC_alloc (tree, heap, nargs * 2);
1832 	  else
1833 	    VEC_truncate (tree, vargs, 0);
1834 
1835 	  if (slp_node)
1836 	    {
1837 	      VEC (slp_void_p, heap) *vec_defs
1838 		= VEC_alloc (slp_void_p, heap, nargs);
1839 	      VEC (tree, heap) *vec_oprnds0;
1840 
1841 	      for (i = 0; i < nargs; i++)
1842 		VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1843 	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1844 	      vec_oprnds0
1845 		= (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1846 
1847 	      /* Arguments are ready.  Create the new vector stmt.  */
1848 	      for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1849 		   i += 2)
1850 		{
1851 		  size_t k;
1852 		  VEC_truncate (tree, vargs, 0);
1853 		  for (k = 0; k < nargs; k++)
1854 		    {
1855 		      VEC (tree, heap) *vec_oprndsk
1856 			= (VEC (tree, heap) *)
1857 			  VEC_index (slp_void_p, vec_defs, k);
1858 		      VEC_quick_push (tree, vargs,
1859 				      VEC_index (tree, vec_oprndsk, i));
1860 		      VEC_quick_push (tree, vargs,
1861 				      VEC_index (tree, vec_oprndsk, i + 1));
1862 		    }
1863 		  new_stmt = gimple_build_call_vec (fndecl, vargs);
1864 		  new_temp = make_ssa_name (vec_dest, new_stmt);
1865 		  gimple_call_set_lhs (new_stmt, new_temp);
1866 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1867 		  mark_symbols_for_renaming (new_stmt);
1868 		  VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1869 				  new_stmt);
1870 		}
1871 
1872 	      for (i = 0; i < nargs; i++)
1873 		{
1874 		  VEC (tree, heap) *vec_oprndsi
1875 		    = (VEC (tree, heap) *)
1876 		      VEC_index (slp_void_p, vec_defs, i);
1877 		  VEC_free (tree, heap, vec_oprndsi);
1878 		}
1879 	      VEC_free (slp_void_p, heap, vec_defs);
1880 	      continue;
1881 	    }
1882 
1883 	  for (i = 0; i < nargs; i++)
1884 	    {
1885 	      op = gimple_call_arg (stmt, i);
1886 	      if (j == 0)
1887 		{
1888 		  vec_oprnd0
1889 		    = vect_get_vec_def_for_operand (op, stmt, NULL);
1890 		  vec_oprnd1
1891 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1892 		}
1893 	      else
1894 		{
1895 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1896 		  vec_oprnd0
1897 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1898 		  vec_oprnd1
1899 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1900 		}
1901 
1902 	      VEC_quick_push (tree, vargs, vec_oprnd0);
1903 	      VEC_quick_push (tree, vargs, vec_oprnd1);
1904 	    }
1905 
1906 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
1907 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1908 	  gimple_call_set_lhs (new_stmt, new_temp);
1909 
1910 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1911 	  mark_symbols_for_renaming (new_stmt);
1912 
1913 	  if (j == 0)
1914 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1915 	  else
1916 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1917 
1918 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1919 	}
1920 
1921       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1922 
1923       break;
1924 
1925     case WIDEN:
1926       /* No current target implements this case.  */
1927       return false;
1928     }
1929 
1930   VEC_free (tree, heap, vargs);
1931 
1932   /* Update the exception handling table with the vector stmt if necessary.  */
1933   if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1934     gimple_purge_dead_eh_edges (gimple_bb (stmt));
1935 
1936   /* The call in STMT might prevent it from being removed in dce.
1937      We however cannot remove it here, due to the way the ssa name
1938      it defines is mapped to the new definition.  So just replace
1939      rhs of the statement with something harmless.  */
1940 
1941   if (slp_node)
1942     return true;
1943 
1944   type = TREE_TYPE (scalar_dest);
1945   if (is_pattern_stmt_p (stmt_info))
1946     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1947   else
1948     lhs = gimple_call_lhs (stmt);
1949   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1950   set_vinfo_for_stmt (new_stmt, stmt_info);
1951   set_vinfo_for_stmt (stmt, NULL);
1952   STMT_VINFO_STMT (stmt_info) = new_stmt;
1953   gsi_replace (gsi, new_stmt, false);
1954   SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1955 
1956   return true;
1957 }
1958 
1959 
1960 /* Function vect_gen_widened_results_half
1961 
1962    Create a vector stmt whose code, type, number of arguments, and result
1963    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1964    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
1965    In the case that CODE is a CALL_EXPR, this means that a call to DECL
1966    needs to be created (DECL is a function-decl of a target-builtin).
1967    STMT is the original scalar stmt that we are vectorizing.  */
1968 
1969 static gimple
1970 vect_gen_widened_results_half (enum tree_code code,
1971 			       tree decl,
1972                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
1973 			       tree vec_dest, gimple_stmt_iterator *gsi,
1974 			       gimple stmt)
1975 {
1976   gimple new_stmt;
1977   tree new_temp;
1978 
1979   /* Generate half of the widened result:  */
1980   if (code == CALL_EXPR)
1981     {
1982       /* Target specific support  */
1983       if (op_type == binary_op)
1984 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1985       else
1986 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1987       new_temp = make_ssa_name (vec_dest, new_stmt);
1988       gimple_call_set_lhs (new_stmt, new_temp);
1989     }
1990   else
1991     {
1992       /* Generic support */
1993       gcc_assert (op_type == TREE_CODE_LENGTH (code));
1994       if (op_type != binary_op)
1995 	vec_oprnd1 = NULL;
1996       new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1997 					       vec_oprnd1);
1998       new_temp = make_ssa_name (vec_dest, new_stmt);
1999       gimple_assign_set_lhs (new_stmt, new_temp);
2000     }
2001   vect_finish_stmt_generation (stmt, new_stmt, gsi);
2002 
2003   return new_stmt;
2004 }
2005 
2006 
2007 /* Get vectorized definitions for loop-based vectorization.  For the first
2008    operand we call vect_get_vec_def_for_operand() (with OPRND containing
2009    scalar operand), and for the rest we get a copy with
2010    vect_get_vec_def_for_stmt_copy() using the previous vector definition
2011    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2012    The vectors are collected into VEC_OPRNDS.  */
2013 
2014 static void
2015 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2016 			  VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2017 {
2018   tree vec_oprnd;
2019 
2020   /* Get first vector operand.  */
2021   /* All the vector operands except the very first one (that is scalar oprnd)
2022      are stmt copies.  */
2023   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2024     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2025   else
2026     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2027 
2028   VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2029 
2030   /* Get second vector operand.  */
2031   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2032   VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2033 
2034   *oprnd = vec_oprnd;
2035 
2036   /* For conversion in multiple steps, continue to get operands
2037      recursively.  */
2038   if (multi_step_cvt)
2039     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
2040 }
2041 
2042 
2043 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2044    For multi-step conversions store the resulting vectors and call the function
2045    recursively.  */
2046 
2047 static void
2048 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2049 				       int multi_step_cvt, gimple stmt,
2050 				       VEC (tree, heap) *vec_dsts,
2051 				       gimple_stmt_iterator *gsi,
2052 				       slp_tree slp_node, enum tree_code code,
2053 				       stmt_vec_info *prev_stmt_info)
2054 {
2055   unsigned int i;
2056   tree vop0, vop1, new_tmp, vec_dest;
2057   gimple new_stmt;
2058   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2059 
2060   vec_dest = VEC_pop (tree, vec_dsts);
2061 
2062   for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2063     {
2064       /* Create demotion operation.  */
2065       vop0 = VEC_index (tree, *vec_oprnds, i);
2066       vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2067       new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2068       new_tmp = make_ssa_name (vec_dest, new_stmt);
2069       gimple_assign_set_lhs (new_stmt, new_tmp);
2070       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2071 
2072       if (multi_step_cvt)
2073 	/* Store the resulting vector for next recursive call.  */
2074 	VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2075       else
2076 	{
2077 	  /* This is the last step of the conversion sequence. Store the
2078 	     vectors in SLP_NODE or in vector info of the scalar statement
2079 	     (or in STMT_VINFO_RELATED_STMT chain).  */
2080 	  if (slp_node)
2081 	    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2082 	  else
2083 	    {
2084 	      if (!*prev_stmt_info)
2085 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2086 	      else
2087 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2088 
2089 	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
2090 	    }
2091 	}
2092     }
2093 
2094   /* For multi-step demotion operations we first generate demotion operations
2095      from the source type to the intermediate types, and then combine the
2096      results (stored in VEC_OPRNDS) in demotion operation to the destination
2097      type.  */
2098   if (multi_step_cvt)
2099     {
2100       /* At each level of recursion we have half of the operands we had at the
2101 	 previous level.  */
2102       VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2103       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2104 					     stmt, vec_dsts, gsi, slp_node,
2105 					     VEC_PACK_TRUNC_EXPR,
2106 					     prev_stmt_info);
2107     }
2108 
2109   VEC_quick_push (tree, vec_dsts, vec_dest);
2110 }
2111 
2112 
2113 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2114    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
2115    the resulting vectors and call the function recursively.  */
2116 
2117 static void
2118 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2119 					VEC (tree, heap) **vec_oprnds1,
2120 					gimple stmt, tree vec_dest,
2121 					gimple_stmt_iterator *gsi,
2122 					enum tree_code code1,
2123 					enum tree_code code2, tree decl1,
2124 					tree decl2, int op_type)
2125 {
2126   int i;
2127   tree vop0, vop1, new_tmp1, new_tmp2;
2128   gimple new_stmt1, new_stmt2;
2129   VEC (tree, heap) *vec_tmp = NULL;
2130 
2131   vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2132   FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2133     {
2134       if (op_type == binary_op)
2135 	vop1 = VEC_index (tree, *vec_oprnds1, i);
2136       else
2137 	vop1 = NULL_TREE;
2138 
2139       /* Generate the two halves of promotion operation.  */
2140       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2141 						 op_type, vec_dest, gsi, stmt);
2142       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2143 						 op_type, vec_dest, gsi, stmt);
2144       if (is_gimple_call (new_stmt1))
2145 	{
2146 	  new_tmp1 = gimple_call_lhs (new_stmt1);
2147 	  new_tmp2 = gimple_call_lhs (new_stmt2);
2148 	}
2149       else
2150 	{
2151 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
2152 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
2153 	}
2154 
2155       /* Store the results for the next step.  */
2156       VEC_quick_push (tree, vec_tmp, new_tmp1);
2157       VEC_quick_push (tree, vec_tmp, new_tmp2);
2158     }
2159 
2160   VEC_free (tree, heap, *vec_oprnds0);
2161   *vec_oprnds0 = vec_tmp;
2162 }
2163 
2164 
2165 /* Check if STMT performs a conversion operation, that can be vectorized.
2166    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2167    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2168    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2169 
2170 static bool
2171 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2172 			 gimple *vec_stmt, slp_tree slp_node)
2173 {
2174   tree vec_dest;
2175   tree scalar_dest;
2176   tree op0, op1 = NULL_TREE;
2177   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2178   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2179   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2180   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2181   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2182   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2183   tree new_temp;
2184   tree def;
2185   gimple def_stmt;
2186   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2187   gimple new_stmt = NULL;
2188   stmt_vec_info prev_stmt_info;
2189   int nunits_in;
2190   int nunits_out;
2191   tree vectype_out, vectype_in;
2192   int ncopies, i, j;
2193   tree lhs_type, rhs_type;
2194   enum { NARROW, NONE, WIDEN } modifier;
2195   VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2196   tree vop0;
2197   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2198   int multi_step_cvt = 0;
2199   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2200   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2201   int op_type;
2202   enum machine_mode rhs_mode;
2203   unsigned short fltsz;
2204 
2205   /* Is STMT a vectorizable conversion?   */
2206 
2207   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2208     return false;
2209 
2210   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2211     return false;
2212 
2213   if (!is_gimple_assign (stmt))
2214     return false;
2215 
2216   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2217     return false;
2218 
2219   code = gimple_assign_rhs_code (stmt);
2220   if (!CONVERT_EXPR_CODE_P (code)
2221       && code != FIX_TRUNC_EXPR
2222       && code != FLOAT_EXPR
2223       && code != WIDEN_MULT_EXPR
2224       && code != WIDEN_LSHIFT_EXPR)
2225     return false;
2226 
2227   op_type = TREE_CODE_LENGTH (code);
2228 
2229   /* Check types of lhs and rhs.  */
2230   scalar_dest = gimple_assign_lhs (stmt);
2231   lhs_type = TREE_TYPE (scalar_dest);
2232   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2233 
2234   op0 = gimple_assign_rhs1 (stmt);
2235   rhs_type = TREE_TYPE (op0);
2236 
2237   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2238       && !((INTEGRAL_TYPE_P (lhs_type)
2239 	    && INTEGRAL_TYPE_P (rhs_type))
2240 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
2241 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
2242     return false;
2243 
2244   if ((INTEGRAL_TYPE_P (lhs_type)
2245        && (TYPE_PRECISION (lhs_type)
2246 	   != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2247       || (INTEGRAL_TYPE_P (rhs_type)
2248 	  && (TYPE_PRECISION (rhs_type)
2249 	      != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2250     {
2251       if (vect_print_dump_info (REPORT_DETAILS))
2252 	fprintf (vect_dump,
2253 		 "type conversion to/from bit-precision unsupported.");
2254       return false;
2255     }
2256 
2257   /* Check the operands of the operation.  */
2258   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2259 			     &def_stmt, &def, &dt[0], &vectype_in))
2260     {
2261       if (vect_print_dump_info (REPORT_DETAILS))
2262 	fprintf (vect_dump, "use not simple.");
2263       return false;
2264     }
2265   if (op_type == binary_op)
2266     {
2267       bool ok;
2268 
2269       op1 = gimple_assign_rhs2 (stmt);
2270       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2271       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2272 	 OP1.  */
2273       if (CONSTANT_CLASS_P (op0))
2274 	ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, NULL,
2275 				   &def_stmt, &def, &dt[1], &vectype_in);
2276       else
2277 	ok = vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &def_stmt,
2278 				 &def, &dt[1]);
2279 
2280       if (!ok)
2281 	{
2282 	  if (vect_print_dump_info (REPORT_DETAILS))
2283 	    fprintf (vect_dump, "use not simple.");
2284 	  return false;
2285 	}
2286     }
2287 
2288   /* If op0 is an external or constant defs use a vector type of
2289      the same size as the output vector type.  */
2290   if (!vectype_in)
2291     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2292   if (vec_stmt)
2293     gcc_assert (vectype_in);
2294   if (!vectype_in)
2295     {
2296       if (vect_print_dump_info (REPORT_DETAILS))
2297 	{
2298 	  fprintf (vect_dump, "no vectype for scalar type ");
2299 	  print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2300 	}
2301 
2302       return false;
2303     }
2304 
2305   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2306   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2307   if (nunits_in < nunits_out)
2308     modifier = NARROW;
2309   else if (nunits_out == nunits_in)
2310     modifier = NONE;
2311   else
2312     modifier = WIDEN;
2313 
2314   /* Multiple types in SLP are handled by creating the appropriate number of
2315      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2316      case of SLP.  */
2317   if (slp_node || PURE_SLP_STMT (stmt_info))
2318     ncopies = 1;
2319   else if (modifier == NARROW)
2320     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2321   else
2322     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2323 
2324   /* Sanity check: make sure that at least one copy of the vectorized stmt
2325      needs to be generated.  */
2326   gcc_assert (ncopies >= 1);
2327 
2328   /* Supportable by target?  */
2329   switch (modifier)
2330     {
2331     case NONE:
2332       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2333 	return false;
2334       if (supportable_convert_operation (code, vectype_out, vectype_in,
2335 					 &decl1, &code1))
2336 	break;
2337       /* FALLTHRU */
2338     unsupported:
2339       if (vect_print_dump_info (REPORT_DETAILS))
2340 	fprintf (vect_dump, "conversion not supported by target.");
2341       return false;
2342 
2343     case WIDEN:
2344       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2345 					  &decl1, &decl2, &code1, &code2,
2346 					  &multi_step_cvt, &interm_types))
2347 	{
2348 	  /* Binary widening operation can only be supported directly by the
2349 	     architecture.  */
2350 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
2351 	  break;
2352 	}
2353 
2354       if (code != FLOAT_EXPR
2355 	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2356 	      <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2357 	goto unsupported;
2358 
2359       rhs_mode = TYPE_MODE (rhs_type);
2360       fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2361       for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2362 	   rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2363 	   rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2364 	{
2365 	  cvt_type
2366 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2367 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2368 	  if (cvt_type == NULL_TREE)
2369 	    goto unsupported;
2370 
2371 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
2372 	    {
2373 	      if (!supportable_convert_operation (code, vectype_out,
2374 						  cvt_type, &decl1, &codecvt1))
2375 		goto unsupported;
2376 	    }
2377 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
2378 						    cvt_type, &decl1, &decl2,
2379 						    &codecvt1, &codecvt2,
2380 						    &multi_step_cvt,
2381 						    &interm_types))
2382 	    continue;
2383 	  else
2384 	    gcc_assert (multi_step_cvt == 0);
2385 
2386 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2387 					      vectype_in, NULL, NULL, &code1,
2388 					      &code2, &multi_step_cvt,
2389 					      &interm_types))
2390 	    break;
2391 	}
2392 
2393       if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2394 	goto unsupported;
2395 
2396       if (GET_MODE_SIZE (rhs_mode) == fltsz)
2397 	codecvt2 = ERROR_MARK;
2398       else
2399 	{
2400 	  multi_step_cvt++;
2401 	  VEC_safe_push (tree, heap, interm_types, cvt_type);
2402 	  cvt_type = NULL_TREE;
2403 	}
2404       break;
2405 
2406     case NARROW:
2407       gcc_assert (op_type == unary_op);
2408       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2409 					   &code1, &multi_step_cvt,
2410 					   &interm_types))
2411 	break;
2412 
2413       if (code != FIX_TRUNC_EXPR
2414 	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2415 	      >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2416 	goto unsupported;
2417 
2418       rhs_mode = TYPE_MODE (rhs_type);
2419       cvt_type
2420 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2421       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2422       if (cvt_type == NULL_TREE)
2423 	goto unsupported;
2424       if (!supportable_convert_operation (code, cvt_type, vectype_in,
2425 					  &decl1, &codecvt1))
2426 	goto unsupported;
2427       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2428 					   &code1, &multi_step_cvt,
2429 					   &interm_types))
2430 	break;
2431       goto unsupported;
2432 
2433     default:
2434       gcc_unreachable ();
2435     }
2436 
2437   if (!vec_stmt)		/* transformation not required.  */
2438     {
2439       if (vect_print_dump_info (REPORT_DETAILS))
2440 	fprintf (vect_dump, "=== vectorizable_conversion ===");
2441       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2442         {
2443 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2444 	  vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2445 	}
2446       else if (modifier == NARROW)
2447 	{
2448 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2449 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2450 	}
2451       else
2452 	{
2453 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2454 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2455 	}
2456       VEC_free (tree, heap, interm_types);
2457       return true;
2458     }
2459 
2460   /** Transform.  **/
2461   if (vect_print_dump_info (REPORT_DETAILS))
2462     fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2463 
2464   if (op_type == binary_op)
2465     {
2466       if (CONSTANT_CLASS_P (op0))
2467 	op0 = fold_convert (TREE_TYPE (op1), op0);
2468       else if (CONSTANT_CLASS_P (op1))
2469 	op1 = fold_convert (TREE_TYPE (op0), op1);
2470     }
2471 
2472   /* In case of multi-step conversion, we first generate conversion operations
2473      to the intermediate types, and then from that types to the final one.
2474      We create vector destinations for the intermediate type (TYPES) received
2475      from supportable_*_operation, and store them in the correct order
2476      for future use in vect_create_vectorized_*_stmts ().  */
2477   vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2478   vec_dest = vect_create_destination_var (scalar_dest,
2479 					  (cvt_type && modifier == WIDEN)
2480 					  ? cvt_type : vectype_out);
2481   VEC_quick_push (tree, vec_dsts, vec_dest);
2482 
2483   if (multi_step_cvt)
2484     {
2485       for (i = VEC_length (tree, interm_types) - 1;
2486 	   VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2487 	{
2488 	  vec_dest = vect_create_destination_var (scalar_dest,
2489 						  intermediate_type);
2490 	  VEC_quick_push (tree, vec_dsts, vec_dest);
2491 	}
2492     }
2493 
2494   if (cvt_type)
2495     vec_dest = vect_create_destination_var (scalar_dest,
2496 					    modifier == WIDEN
2497 					    ? vectype_out : cvt_type);
2498 
2499   if (!slp_node)
2500     {
2501       if (modifier == NONE)
2502 	vec_oprnds0 = VEC_alloc (tree, heap, 1);
2503       else if (modifier == WIDEN)
2504 	{
2505 	  vec_oprnds0 = VEC_alloc (tree, heap,
2506 				   (multi_step_cvt
2507 				    ? vect_pow2 (multi_step_cvt) : 1));
2508 	  if (op_type == binary_op)
2509 	    vec_oprnds1 = VEC_alloc (tree, heap, 1);
2510 	}
2511       else
2512 	vec_oprnds0 = VEC_alloc (tree, heap,
2513 				 2 * (multi_step_cvt
2514 				      ? vect_pow2 (multi_step_cvt) : 1));
2515     }
2516   else if (code == WIDEN_LSHIFT_EXPR)
2517     vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2518 
2519   last_oprnd = op0;
2520   prev_stmt_info = NULL;
2521   switch (modifier)
2522     {
2523     case NONE:
2524       for (j = 0; j < ncopies; j++)
2525 	{
2526 	  if (j == 0)
2527 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2528 			       -1);
2529 	  else
2530 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2531 
2532 	  FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2533 	    {
2534 	      /* Arguments are ready, create the new vector stmt.  */
2535 	      if (code1 == CALL_EXPR)
2536 		{
2537 		  new_stmt = gimple_build_call (decl1, 1, vop0);
2538 		  new_temp = make_ssa_name (vec_dest, new_stmt);
2539 		  gimple_call_set_lhs (new_stmt, new_temp);
2540 		}
2541 	      else
2542 		{
2543 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2544 		  new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2545 							   vop0, NULL);
2546 		  new_temp = make_ssa_name (vec_dest, new_stmt);
2547 		  gimple_assign_set_lhs (new_stmt, new_temp);
2548 		}
2549 
2550 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2551 	      if (slp_node)
2552 		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2553 				new_stmt);
2554 	    }
2555 
2556 	  if (j == 0)
2557 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2558 	  else
2559 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2560 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2561 	}
2562       break;
2563 
2564     case WIDEN:
2565       /* In case the vectorization factor (VF) is bigger than the number
2566 	 of elements that we can fit in a vectype (nunits), we have to
2567 	 generate more than one vector stmt - i.e - we need to "unroll"
2568 	 the vector stmt by a factor VF/nunits.  */
2569       for (j = 0; j < ncopies; j++)
2570 	{
2571 	  /* Handle uses.  */
2572 	  if (j == 0)
2573 	    {
2574 	      if (slp_node)
2575 		{
2576 		  if (code == WIDEN_LSHIFT_EXPR)
2577 		    {
2578 		      unsigned int k;
2579 
2580 		      vec_oprnd1 = op1;
2581 		      /* Store vec_oprnd1 for every vector stmt to be created
2582 			 for SLP_NODE.  We check during the analysis that all
2583 			 the shift arguments are the same.  */
2584 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2585 			VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2586 
2587 		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2588 					 slp_node, -1);
2589 		    }
2590 		  else
2591 		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2592 				       &vec_oprnds1, slp_node, -1);
2593 		}
2594 	      else
2595 		{
2596 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2597 		  VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2598 		  if (op_type == binary_op)
2599 		    {
2600 		      if (code == WIDEN_LSHIFT_EXPR)
2601 			vec_oprnd1 = op1;
2602 		      else
2603 			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2604 								   NULL);
2605 		      VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2606 		    }
2607 		}
2608 	    }
2609 	  else
2610 	    {
2611 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2612 	      VEC_truncate (tree, vec_oprnds0, 0);
2613 	      VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2614 	      if (op_type == binary_op)
2615 		{
2616 		  if (code == WIDEN_LSHIFT_EXPR)
2617 		    vec_oprnd1 = op1;
2618 		  else
2619 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2620 								 vec_oprnd1);
2621 		  VEC_truncate (tree, vec_oprnds1, 0);
2622 		  VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2623 		}
2624 	    }
2625 
2626 	  /* Arguments are ready.  Create the new vector stmts.  */
2627 	  for (i = multi_step_cvt; i >= 0; i--)
2628 	    {
2629 	      tree this_dest = VEC_index (tree, vec_dsts, i);
2630 	      enum tree_code c1 = code1, c2 = code2;
2631 	      if (i == 0 && codecvt2 != ERROR_MARK)
2632 		{
2633 		  c1 = codecvt1;
2634 		  c2 = codecvt2;
2635 		}
2636 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2637 						      &vec_oprnds1,
2638 						      stmt, this_dest, gsi,
2639 						      c1, c2, decl1, decl2,
2640 						      op_type);
2641 	    }
2642 
2643 	  FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2644 	    {
2645 	      if (cvt_type)
2646 		{
2647 		  if (codecvt1 == CALL_EXPR)
2648 		    {
2649 		      new_stmt = gimple_build_call (decl1, 1, vop0);
2650 		      new_temp = make_ssa_name (vec_dest, new_stmt);
2651 		      gimple_call_set_lhs (new_stmt, new_temp);
2652 		    }
2653 		  else
2654 		    {
2655 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2656 		      new_temp = make_ssa_name (vec_dest, NULL);
2657 		      new_stmt = gimple_build_assign_with_ops (codecvt1,
2658 							       new_temp,
2659 							       vop0, NULL);
2660 		    }
2661 
2662 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2663 		}
2664 	      else
2665 		new_stmt = SSA_NAME_DEF_STMT (vop0);
2666 
2667 	      if (slp_node)
2668 		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2669 				new_stmt);
2670 	      else
2671 		{
2672 		  if (!prev_stmt_info)
2673 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2674 		  else
2675 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2676 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
2677 		}
2678 	    }
2679 	}
2680 
2681       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2682       break;
2683 
2684     case NARROW:
2685       /* In case the vectorization factor (VF) is bigger than the number
2686 	 of elements that we can fit in a vectype (nunits), we have to
2687 	 generate more than one vector stmt - i.e - we need to "unroll"
2688 	 the vector stmt by a factor VF/nunits.  */
2689       for (j = 0; j < ncopies; j++)
2690 	{
2691 	  /* Handle uses.  */
2692 	  if (slp_node)
2693 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2694 			       slp_node, -1);
2695 	  else
2696 	    {
2697 	      VEC_truncate (tree, vec_oprnds0, 0);
2698 	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2699 					vect_pow2 (multi_step_cvt) - 1);
2700 	    }
2701 
2702 	  /* Arguments are ready.  Create the new vector stmts.  */
2703 	  if (cvt_type)
2704 	    FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2705 	      {
2706 		if (codecvt1 == CALL_EXPR)
2707 		  {
2708 		    new_stmt = gimple_build_call (decl1, 1, vop0);
2709 		    new_temp = make_ssa_name (vec_dest, new_stmt);
2710 		    gimple_call_set_lhs (new_stmt, new_temp);
2711 		  }
2712 		else
2713 		  {
2714 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2715 		    new_temp = make_ssa_name (vec_dest, NULL);
2716 		    new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2717 							     vop0, NULL);
2718 		  }
2719 
2720 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
2721 		VEC_replace (tree, vec_oprnds0, i, new_temp);
2722 	      }
2723 
2724 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2725 						 stmt, vec_dsts, gsi,
2726 						 slp_node, code1,
2727 						 &prev_stmt_info);
2728 	}
2729 
2730       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2731       break;
2732     }
2733 
2734   VEC_free (tree, heap, vec_oprnds0);
2735   VEC_free (tree, heap, vec_oprnds1);
2736   VEC_free (tree, heap, vec_dsts);
2737   VEC_free (tree, heap, interm_types);
2738 
2739   return true;
2740 }
2741 
2742 
2743 /* Function vectorizable_assignment.
2744 
2745    Check if STMT performs an assignment (copy) that can be vectorized.
2746    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2747    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2748    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2749 
2750 static bool
2751 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2752 			 gimple *vec_stmt, slp_tree slp_node)
2753 {
2754   tree vec_dest;
2755   tree scalar_dest;
2756   tree op;
2757   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2758   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2759   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2760   tree new_temp;
2761   tree def;
2762   gimple def_stmt;
2763   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2764   unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2765   int ncopies;
2766   int i, j;
2767   VEC(tree,heap) *vec_oprnds = NULL;
2768   tree vop;
2769   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2770   gimple new_stmt = NULL;
2771   stmt_vec_info prev_stmt_info = NULL;
2772   enum tree_code code;
2773   tree vectype_in;
2774 
2775   /* Multiple types in SLP are handled by creating the appropriate number of
2776      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2777      case of SLP.  */
2778   if (slp_node || PURE_SLP_STMT (stmt_info))
2779     ncopies = 1;
2780   else
2781     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2782 
2783   gcc_assert (ncopies >= 1);
2784 
2785   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2786     return false;
2787 
2788   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2789     return false;
2790 
2791   /* Is vectorizable assignment?  */
2792   if (!is_gimple_assign (stmt))
2793     return false;
2794 
2795   scalar_dest = gimple_assign_lhs (stmt);
2796   if (TREE_CODE (scalar_dest) != SSA_NAME)
2797     return false;
2798 
2799   code = gimple_assign_rhs_code (stmt);
2800   if (gimple_assign_single_p (stmt)
2801       || code == PAREN_EXPR
2802       || CONVERT_EXPR_CODE_P (code))
2803     op = gimple_assign_rhs1 (stmt);
2804   else
2805     return false;
2806 
2807   if (code == VIEW_CONVERT_EXPR)
2808     op = TREE_OPERAND (op, 0);
2809 
2810   if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2811 			     &def_stmt, &def, &dt[0], &vectype_in))
2812     {
2813       if (vect_print_dump_info (REPORT_DETAILS))
2814         fprintf (vect_dump, "use not simple.");
2815       return false;
2816     }
2817 
2818   /* We can handle NOP_EXPR conversions that do not change the number
2819      of elements or the vector size.  */
2820   if ((CONVERT_EXPR_CODE_P (code)
2821        || code == VIEW_CONVERT_EXPR)
2822       && (!vectype_in
2823 	  || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2824 	  || (GET_MODE_SIZE (TYPE_MODE (vectype))
2825 	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2826     return false;
2827 
2828   /* We do not handle bit-precision changes.  */
2829   if ((CONVERT_EXPR_CODE_P (code)
2830        || code == VIEW_CONVERT_EXPR)
2831       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2832       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2833 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2834 	  || ((TYPE_PRECISION (TREE_TYPE (op))
2835 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2836       /* But a conversion that does not change the bit-pattern is ok.  */
2837       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2838 	    > TYPE_PRECISION (TREE_TYPE (op)))
2839 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
2840     {
2841       if (vect_print_dump_info (REPORT_DETAILS))
2842         fprintf (vect_dump, "type conversion to/from bit-precision "
2843 		 "unsupported.");
2844       return false;
2845     }
2846 
2847   if (!vec_stmt) /* transformation not required.  */
2848     {
2849       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2850       if (vect_print_dump_info (REPORT_DETAILS))
2851         fprintf (vect_dump, "=== vectorizable_assignment ===");
2852       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2853       return true;
2854     }
2855 
2856   /** Transform.  **/
2857   if (vect_print_dump_info (REPORT_DETAILS))
2858     fprintf (vect_dump, "transform assignment.");
2859 
2860   /* Handle def.  */
2861   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2862 
2863   /* Handle use.  */
2864   for (j = 0; j < ncopies; j++)
2865     {
2866       /* Handle uses.  */
2867       if (j == 0)
2868         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2869       else
2870         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2871 
2872       /* Arguments are ready. create the new vector stmt.  */
2873       FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2874        {
2875 	 if (CONVERT_EXPR_CODE_P (code)
2876 	     || code == VIEW_CONVERT_EXPR)
2877 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2878          new_stmt = gimple_build_assign (vec_dest, vop);
2879          new_temp = make_ssa_name (vec_dest, new_stmt);
2880          gimple_assign_set_lhs (new_stmt, new_temp);
2881          vect_finish_stmt_generation (stmt, new_stmt, gsi);
2882          if (slp_node)
2883            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2884        }
2885 
2886       if (slp_node)
2887         continue;
2888 
2889       if (j == 0)
2890         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2891       else
2892         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2893 
2894       prev_stmt_info = vinfo_for_stmt (new_stmt);
2895     }
2896 
2897   VEC_free (tree, heap, vec_oprnds);
2898   return true;
2899 }
2900 
2901 
2902 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2903    either as shift by a scalar or by a vector.  */
2904 
2905 bool
2906 vect_supportable_shift (enum tree_code code, tree scalar_type)
2907 {
2908 
2909   enum machine_mode vec_mode;
2910   optab optab;
2911   int icode;
2912   tree vectype;
2913 
2914   vectype = get_vectype_for_scalar_type (scalar_type);
2915   if (!vectype)
2916     return false;
2917 
2918   optab = optab_for_tree_code (code, vectype, optab_scalar);
2919   if (!optab
2920       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2921     {
2922       optab = optab_for_tree_code (code, vectype, optab_vector);
2923       if (!optab
2924           || (optab_handler (optab, TYPE_MODE (vectype))
2925                       == CODE_FOR_nothing))
2926         return false;
2927     }
2928 
2929   vec_mode = TYPE_MODE (vectype);
2930   icode = (int) optab_handler (optab, vec_mode);
2931   if (icode == CODE_FOR_nothing)
2932     return false;
2933 
2934   return true;
2935 }
2936 
2937 
2938 /* Function vectorizable_shift.
2939 
2940    Check if STMT performs a shift operation that can be vectorized.
2941    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2942    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2943    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2944 
2945 static bool
2946 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2947                     gimple *vec_stmt, slp_tree slp_node)
2948 {
2949   tree vec_dest;
2950   tree scalar_dest;
2951   tree op0, op1 = NULL;
2952   tree vec_oprnd1 = NULL_TREE;
2953   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2954   tree vectype;
2955   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2956   enum tree_code code;
2957   enum machine_mode vec_mode;
2958   tree new_temp;
2959   optab optab;
2960   int icode;
2961   enum machine_mode optab_op2_mode;
2962   tree def;
2963   gimple def_stmt;
2964   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2965   gimple new_stmt = NULL;
2966   stmt_vec_info prev_stmt_info;
2967   int nunits_in;
2968   int nunits_out;
2969   tree vectype_out;
2970   tree op1_vectype;
2971   int ncopies;
2972   int j, i;
2973   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2974   tree vop0, vop1;
2975   unsigned int k;
2976   bool scalar_shift_arg = true;
2977   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2978   int vf;
2979 
2980   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2981     return false;
2982 
2983   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2984     return false;
2985 
2986   /* Is STMT a vectorizable binary/unary operation?   */
2987   if (!is_gimple_assign (stmt))
2988     return false;
2989 
2990   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2991     return false;
2992 
2993   code = gimple_assign_rhs_code (stmt);
2994 
2995   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2996       || code == RROTATE_EXPR))
2997     return false;
2998 
2999   scalar_dest = gimple_assign_lhs (stmt);
3000   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3001   if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3002       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3003     {
3004       if (vect_print_dump_info (REPORT_DETAILS))
3005         fprintf (vect_dump, "bit-precision shifts not supported.");
3006       return false;
3007     }
3008 
3009   op0 = gimple_assign_rhs1 (stmt);
3010   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3011                              &def_stmt, &def, &dt[0], &vectype))
3012     {
3013       if (vect_print_dump_info (REPORT_DETAILS))
3014         fprintf (vect_dump, "use not simple.");
3015       return false;
3016     }
3017   /* If op0 is an external or constant def use a vector type with
3018      the same size as the output vector type.  */
3019   if (!vectype)
3020     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3021   if (vec_stmt)
3022     gcc_assert (vectype);
3023   if (!vectype)
3024     {
3025       if (vect_print_dump_info (REPORT_DETAILS))
3026         {
3027           fprintf (vect_dump, "no vectype for scalar type ");
3028           print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3029         }
3030 
3031       return false;
3032     }
3033 
3034   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3035   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3036   if (nunits_out != nunits_in)
3037     return false;
3038 
3039   op1 = gimple_assign_rhs2 (stmt);
3040   if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3041 			     &def, &dt[1], &op1_vectype))
3042     {
3043       if (vect_print_dump_info (REPORT_DETAILS))
3044         fprintf (vect_dump, "use not simple.");
3045       return false;
3046     }
3047 
3048   if (loop_vinfo)
3049     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3050   else
3051     vf = 1;
3052 
3053   /* Multiple types in SLP are handled by creating the appropriate number of
3054      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3055      case of SLP.  */
3056   if (slp_node || PURE_SLP_STMT (stmt_info))
3057     ncopies = 1;
3058   else
3059     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3060 
3061   gcc_assert (ncopies >= 1);
3062 
3063   /* Determine whether the shift amount is a vector, or scalar.  If the
3064      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
3065 
3066   if (dt[1] == vect_internal_def && !slp_node)
3067     scalar_shift_arg = false;
3068   else if (dt[1] == vect_constant_def
3069 	   || dt[1] == vect_external_def
3070 	   || dt[1] == vect_internal_def)
3071     {
3072       /* In SLP, need to check whether the shift count is the same,
3073 	 in loops if it is a constant or invariant, it is always
3074 	 a scalar shift.  */
3075       if (slp_node)
3076 	{
3077 	  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3078 	  gimple slpstmt;
3079 
3080 	  FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3081 	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3082 	      scalar_shift_arg = false;
3083 	}
3084     }
3085   else
3086     {
3087       if (vect_print_dump_info (REPORT_DETAILS))
3088 	fprintf (vect_dump, "operand mode requires invariant argument.");
3089       return false;
3090     }
3091 
3092   /* Vector shifted by vector.  */
3093   if (!scalar_shift_arg)
3094     {
3095       optab = optab_for_tree_code (code, vectype, optab_vector);
3096       if (vect_print_dump_info (REPORT_DETAILS))
3097         fprintf (vect_dump, "vector/vector shift/rotate found.");
3098       if (!op1_vectype)
3099 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3100       if (op1_vectype == NULL_TREE
3101 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3102 	{
3103 	  if (vect_print_dump_info (REPORT_DETAILS))
3104 	    fprintf (vect_dump, "unusable type for last operand in"
3105 				" vector/vector shift/rotate.");
3106 	  return false;
3107 	}
3108     }
3109   /* See if the machine has a vector shifted by scalar insn and if not
3110      then see if it has a vector shifted by vector insn.  */
3111   else
3112     {
3113       optab = optab_for_tree_code (code, vectype, optab_scalar);
3114       if (optab
3115           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3116         {
3117           if (vect_print_dump_info (REPORT_DETAILS))
3118             fprintf (vect_dump, "vector/scalar shift/rotate found.");
3119         }
3120       else
3121         {
3122           optab = optab_for_tree_code (code, vectype, optab_vector);
3123           if (optab
3124                && (optab_handler (optab, TYPE_MODE (vectype))
3125                       != CODE_FOR_nothing))
3126             {
3127 	      scalar_shift_arg = false;
3128 
3129               if (vect_print_dump_info (REPORT_DETAILS))
3130                 fprintf (vect_dump, "vector/vector shift/rotate found.");
3131 
3132               /* Unlike the other binary operators, shifts/rotates have
3133                  the rhs being int, instead of the same type as the lhs,
3134                  so make sure the scalar is the right type if we are
3135 		 dealing with vectors of long long/long/short/char.  */
3136               if (dt[1] == vect_constant_def)
3137                 op1 = fold_convert (TREE_TYPE (vectype), op1);
3138 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3139 						   TREE_TYPE (op1)))
3140 		{
3141 		  if (slp_node
3142 		      && TYPE_MODE (TREE_TYPE (vectype))
3143 			 != TYPE_MODE (TREE_TYPE (op1)))
3144 		    {
3145 		      if (vect_print_dump_info (REPORT_DETAILS))
3146 		      fprintf (vect_dump, "unusable type for last operand in"
3147 					  " vector/vector shift/rotate.");
3148 			return false;
3149 		    }
3150 		  if (vec_stmt && !slp_node)
3151 		    {
3152 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
3153 		      op1 = vect_init_vector (stmt, op1,
3154 					      TREE_TYPE (vectype), NULL);
3155 		    }
3156 		}
3157             }
3158         }
3159     }
3160 
3161   /* Supportable by target?  */
3162   if (!optab)
3163     {
3164       if (vect_print_dump_info (REPORT_DETAILS))
3165         fprintf (vect_dump, "no optab.");
3166       return false;
3167     }
3168   vec_mode = TYPE_MODE (vectype);
3169   icode = (int) optab_handler (optab, vec_mode);
3170   if (icode == CODE_FOR_nothing)
3171     {
3172       if (vect_print_dump_info (REPORT_DETAILS))
3173         fprintf (vect_dump, "op not supported by target.");
3174       /* Check only during analysis.  */
3175       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3176           || (vf < vect_min_worthwhile_factor (code)
3177               && !vec_stmt))
3178         return false;
3179       if (vect_print_dump_info (REPORT_DETAILS))
3180         fprintf (vect_dump, "proceeding using word mode.");
3181     }
3182 
3183   /* Worthwhile without SIMD support?  Check only during analysis.  */
3184   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3185       && vf < vect_min_worthwhile_factor (code)
3186       && !vec_stmt)
3187     {
3188       if (vect_print_dump_info (REPORT_DETAILS))
3189         fprintf (vect_dump, "not worthwhile without SIMD support.");
3190       return false;
3191     }
3192 
3193   if (!vec_stmt) /* transformation not required.  */
3194     {
3195       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3196       if (vect_print_dump_info (REPORT_DETAILS))
3197         fprintf (vect_dump, "=== vectorizable_shift ===");
3198       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3199       return true;
3200     }
3201 
3202   /** Transform.  **/
3203 
3204   if (vect_print_dump_info (REPORT_DETAILS))
3205     fprintf (vect_dump, "transform binary/unary operation.");
3206 
3207   /* Handle def.  */
3208   vec_dest = vect_create_destination_var (scalar_dest, vectype);
3209 
3210   /* Allocate VECs for vector operands.  In case of SLP, vector operands are
3211      created in the previous stages of the recursion, so no allocation is
3212      needed, except for the case of shift with scalar shift argument.  In that
3213      case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3214      be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3215      In case of loop-based vectorization we allocate VECs of size 1.  We
3216      allocate VEC_OPRNDS1 only in case of binary operation.  */
3217   if (!slp_node)
3218     {
3219       vec_oprnds0 = VEC_alloc (tree, heap, 1);
3220       vec_oprnds1 = VEC_alloc (tree, heap, 1);
3221     }
3222   else if (scalar_shift_arg)
3223     vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3224 
3225   prev_stmt_info = NULL;
3226   for (j = 0; j < ncopies; j++)
3227     {
3228       /* Handle uses.  */
3229       if (j == 0)
3230         {
3231           if (scalar_shift_arg)
3232             {
3233               /* Vector shl and shr insn patterns can be defined with scalar
3234                  operand 2 (shift operand).  In this case, use constant or loop
3235                  invariant op1 directly, without extending it to vector mode
3236                  first.  */
3237               optab_op2_mode = insn_data[icode].operand[2].mode;
3238               if (!VECTOR_MODE_P (optab_op2_mode))
3239                 {
3240                   if (vect_print_dump_info (REPORT_DETAILS))
3241                     fprintf (vect_dump, "operand 1 using scalar mode.");
3242                   vec_oprnd1 = op1;
3243                   VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3244                   if (slp_node)
3245                     {
3246                       /* Store vec_oprnd1 for every vector stmt to be created
3247                          for SLP_NODE.  We check during the analysis that all
3248                          the shift arguments are the same.
3249                          TODO: Allow different constants for different vector
3250                          stmts generated for an SLP instance.  */
3251                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3252                         VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3253                     }
3254                 }
3255             }
3256 
3257           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3258              (a special case for certain kind of vector shifts); otherwise,
3259              operand 1 should be of a vector type (the usual case).  */
3260           if (vec_oprnd1)
3261             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3262                                slp_node, -1);
3263           else
3264             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3265                                slp_node, -1);
3266         }
3267       else
3268         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3269 
3270       /* Arguments are ready.  Create the new vector stmt.  */
3271       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3272         {
3273           vop1 = VEC_index (tree, vec_oprnds1, i);
3274           new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3275           new_temp = make_ssa_name (vec_dest, new_stmt);
3276           gimple_assign_set_lhs (new_stmt, new_temp);
3277           vect_finish_stmt_generation (stmt, new_stmt, gsi);
3278           if (slp_node)
3279             VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3280         }
3281 
3282       if (slp_node)
3283         continue;
3284 
3285       if (j == 0)
3286         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3287       else
3288         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3289       prev_stmt_info = vinfo_for_stmt (new_stmt);
3290     }
3291 
3292   VEC_free (tree, heap, vec_oprnds0);
3293   VEC_free (tree, heap, vec_oprnds1);
3294 
3295   return true;
3296 }
3297 
3298 
3299 /* Function vectorizable_operation.
3300 
3301    Check if STMT performs a binary, unary or ternary operation that can
3302    be vectorized.
3303    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3304    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3305    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3306 
3307 static bool
3308 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3309 			gimple *vec_stmt, slp_tree slp_node)
3310 {
3311   tree vec_dest;
3312   tree scalar_dest;
3313   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3314   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3315   tree vectype;
3316   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3317   enum tree_code code;
3318   enum machine_mode vec_mode;
3319   tree new_temp;
3320   int op_type;
3321   optab optab;
3322   int icode;
3323   tree def;
3324   gimple def_stmt;
3325   enum vect_def_type dt[3]
3326     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3327   gimple new_stmt = NULL;
3328   stmt_vec_info prev_stmt_info;
3329   int nunits_in;
3330   int nunits_out;
3331   tree vectype_out;
3332   int ncopies;
3333   int j, i;
3334   VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3335   tree vop0, vop1, vop2;
3336   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3337   int vf;
3338 
3339   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3340     return false;
3341 
3342   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3343     return false;
3344 
3345   /* Is STMT a vectorizable binary/unary operation?   */
3346   if (!is_gimple_assign (stmt))
3347     return false;
3348 
3349   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3350     return false;
3351 
3352   code = gimple_assign_rhs_code (stmt);
3353 
3354   /* For pointer addition, we should use the normal plus for
3355      the vector addition.  */
3356   if (code == POINTER_PLUS_EXPR)
3357     code = PLUS_EXPR;
3358 
3359   /* Support only unary or binary operations.  */
3360   op_type = TREE_CODE_LENGTH (code);
3361   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3362     {
3363       if (vect_print_dump_info (REPORT_DETAILS))
3364 	fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3365 		 op_type);
3366       return false;
3367     }
3368 
3369   scalar_dest = gimple_assign_lhs (stmt);
3370   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3371 
3372   /* Most operations cannot handle bit-precision types without extra
3373      truncations.  */
3374   if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3375        != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3376       /* Exception are bitwise binary operations.  */
3377       && code != BIT_IOR_EXPR
3378       && code != BIT_XOR_EXPR
3379       && code != BIT_AND_EXPR)
3380     {
3381       if (vect_print_dump_info (REPORT_DETAILS))
3382         fprintf (vect_dump, "bit-precision arithmetic not supported.");
3383       return false;
3384     }
3385 
3386   op0 = gimple_assign_rhs1 (stmt);
3387   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3388 			     &def_stmt, &def, &dt[0], &vectype))
3389     {
3390       if (vect_print_dump_info (REPORT_DETAILS))
3391         fprintf (vect_dump, "use not simple.");
3392       return false;
3393     }
3394   /* If op0 is an external or constant def use a vector type with
3395      the same size as the output vector type.  */
3396   if (!vectype)
3397     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3398   if (vec_stmt)
3399     gcc_assert (vectype);
3400   if (!vectype)
3401     {
3402       if (vect_print_dump_info (REPORT_DETAILS))
3403         {
3404           fprintf (vect_dump, "no vectype for scalar type ");
3405           print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3406         }
3407 
3408       return false;
3409     }
3410 
3411   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3412   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3413   if (nunits_out != nunits_in)
3414     return false;
3415 
3416   if (op_type == binary_op || op_type == ternary_op)
3417     {
3418       op1 = gimple_assign_rhs2 (stmt);
3419       if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3420 			       &def, &dt[1]))
3421 	{
3422 	  if (vect_print_dump_info (REPORT_DETAILS))
3423 	    fprintf (vect_dump, "use not simple.");
3424 	  return false;
3425 	}
3426     }
3427   if (op_type == ternary_op)
3428     {
3429       op2 = gimple_assign_rhs3 (stmt);
3430       if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3431 			       &def, &dt[2]))
3432 	{
3433 	  if (vect_print_dump_info (REPORT_DETAILS))
3434 	    fprintf (vect_dump, "use not simple.");
3435 	  return false;
3436 	}
3437     }
3438 
3439   if (loop_vinfo)
3440     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3441   else
3442     vf = 1;
3443 
3444   /* Multiple types in SLP are handled by creating the appropriate number of
3445      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3446      case of SLP.  */
3447   if (slp_node || PURE_SLP_STMT (stmt_info))
3448     ncopies = 1;
3449   else
3450     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3451 
3452   gcc_assert (ncopies >= 1);
3453 
3454   /* Shifts are handled in vectorizable_shift ().  */
3455   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3456       || code == RROTATE_EXPR)
3457    return false;
3458 
3459   optab = optab_for_tree_code (code, vectype, optab_default);
3460 
3461   /* Supportable by target?  */
3462   if (!optab)
3463     {
3464       if (vect_print_dump_info (REPORT_DETAILS))
3465 	fprintf (vect_dump, "no optab.");
3466       return false;
3467     }
3468   vec_mode = TYPE_MODE (vectype);
3469   icode = (int) optab_handler (optab, vec_mode);
3470   if (icode == CODE_FOR_nothing)
3471     {
3472       if (vect_print_dump_info (REPORT_DETAILS))
3473 	fprintf (vect_dump, "op not supported by target.");
3474       /* Check only during analysis.  */
3475       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3476 	  || (vf < vect_min_worthwhile_factor (code)
3477               && !vec_stmt))
3478         return false;
3479       if (vect_print_dump_info (REPORT_DETAILS))
3480 	fprintf (vect_dump, "proceeding using word mode.");
3481     }
3482 
3483   /* Worthwhile without SIMD support?  Check only during analysis.  */
3484   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3485       && vf < vect_min_worthwhile_factor (code)
3486       && !vec_stmt)
3487     {
3488       if (vect_print_dump_info (REPORT_DETAILS))
3489 	fprintf (vect_dump, "not worthwhile without SIMD support.");
3490       return false;
3491     }
3492 
3493   if (!vec_stmt) /* transformation not required.  */
3494     {
3495       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3496       if (vect_print_dump_info (REPORT_DETAILS))
3497         fprintf (vect_dump, "=== vectorizable_operation ===");
3498       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3499       return true;
3500     }
3501 
3502   /** Transform.  **/
3503 
3504   if (vect_print_dump_info (REPORT_DETAILS))
3505     fprintf (vect_dump, "transform binary/unary operation.");
3506 
3507   /* Handle def.  */
3508   vec_dest = vect_create_destination_var (scalar_dest, vectype);
3509 
3510   /* In case the vectorization factor (VF) is bigger than the number
3511      of elements that we can fit in a vectype (nunits), we have to generate
3512      more than one vector stmt - i.e - we need to "unroll" the
3513      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
3514      from one copy of the vector stmt to the next, in the field
3515      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
3516      stages to find the correct vector defs to be used when vectorizing
3517      stmts that use the defs of the current stmt.  The example below
3518      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3519      we need to create 4 vectorized stmts):
3520 
3521      before vectorization:
3522                                 RELATED_STMT    VEC_STMT
3523         S1:     x = memref      -               -
3524         S2:     z = x + 1       -               -
3525 
3526      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3527              there):
3528                                 RELATED_STMT    VEC_STMT
3529         VS1_0:  vx0 = memref0   VS1_1           -
3530         VS1_1:  vx1 = memref1   VS1_2           -
3531         VS1_2:  vx2 = memref2   VS1_3           -
3532         VS1_3:  vx3 = memref3   -               -
3533         S1:     x = load        -               VS1_0
3534         S2:     z = x + 1       -               -
3535 
3536      step2: vectorize stmt S2 (done here):
3537         To vectorize stmt S2 we first need to find the relevant vector
3538         def for the first operand 'x'.  This is, as usual, obtained from
3539         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3540         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
3541         relevant vector def 'vx0'.  Having found 'vx0' we can generate
3542         the vector stmt VS2_0, and as usual, record it in the
3543         STMT_VINFO_VEC_STMT of stmt S2.
3544         When creating the second copy (VS2_1), we obtain the relevant vector
3545         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3546         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
3547         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
3548         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3549         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
3550         chain of stmts and pointers:
3551                                 RELATED_STMT    VEC_STMT
3552         VS1_0:  vx0 = memref0   VS1_1           -
3553         VS1_1:  vx1 = memref1   VS1_2           -
3554         VS1_2:  vx2 = memref2   VS1_3           -
3555         VS1_3:  vx3 = memref3   -               -
3556         S1:     x = load        -               VS1_0
3557         VS2_0:  vz0 = vx0 + v1  VS2_1           -
3558         VS2_1:  vz1 = vx1 + v1  VS2_2           -
3559         VS2_2:  vz2 = vx2 + v1  VS2_3           -
3560         VS2_3:  vz3 = vx3 + v1  -               -
3561         S2:     z = x + 1       -               VS2_0  */
3562 
3563   prev_stmt_info = NULL;
3564   for (j = 0; j < ncopies; j++)
3565     {
3566       /* Handle uses.  */
3567       if (j == 0)
3568 	{
3569 	  if (op_type == binary_op || op_type == ternary_op)
3570 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3571 			       slp_node, -1);
3572 	  else
3573 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3574 			       slp_node, -1);
3575 	  if (op_type == ternary_op)
3576 	    {
3577 	      vec_oprnds2 = VEC_alloc (tree, heap, 1);
3578 	      VEC_quick_push (tree, vec_oprnds2,
3579 			      vect_get_vec_def_for_operand (op2, stmt, NULL));
3580 	    }
3581 	}
3582       else
3583 	{
3584 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3585 	  if (op_type == ternary_op)
3586 	    {
3587 	      tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3588 	      VEC_quick_push (tree, vec_oprnds2,
3589 			      vect_get_vec_def_for_stmt_copy (dt[2],
3590 							      vec_oprnd));
3591 	    }
3592 	}
3593 
3594       /* Arguments are ready.  Create the new vector stmt.  */
3595       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3596         {
3597 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
3598 		  ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3599 	  vop2 = ((op_type == ternary_op)
3600 		  ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3601 	  new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3602 						    vop0, vop1, vop2);
3603 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3604 	  gimple_assign_set_lhs (new_stmt, new_temp);
3605 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3606           if (slp_node)
3607 	    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3608         }
3609 
3610       if (slp_node)
3611         continue;
3612 
3613       if (j == 0)
3614 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3615       else
3616 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3617       prev_stmt_info = vinfo_for_stmt (new_stmt);
3618     }
3619 
3620   VEC_free (tree, heap, vec_oprnds0);
3621   if (vec_oprnds1)
3622     VEC_free (tree, heap, vec_oprnds1);
3623   if (vec_oprnds2)
3624     VEC_free (tree, heap, vec_oprnds2);
3625 
3626   return true;
3627 }
3628 
3629 
3630 /* Function vectorizable_store.
3631 
3632    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3633    can be vectorized.
3634    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3635    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3636    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3637 
3638 static bool
3639 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3640 		    slp_tree slp_node)
3641 {
3642   tree scalar_dest;
3643   tree data_ref;
3644   tree op;
3645   tree vec_oprnd = NULL_TREE;
3646   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3647   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3648   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3649   tree elem_type;
3650   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3651   struct loop *loop = NULL;
3652   enum machine_mode vec_mode;
3653   tree dummy;
3654   enum dr_alignment_support alignment_support_scheme;
3655   tree def;
3656   gimple def_stmt;
3657   enum vect_def_type dt;
3658   stmt_vec_info prev_stmt_info = NULL;
3659   tree dataref_ptr = NULL_TREE;
3660   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3661   int ncopies;
3662   int j;
3663   gimple next_stmt, first_stmt = NULL;
3664   bool strided_store = false;
3665   bool store_lanes_p = false;
3666   unsigned int group_size, i;
3667   VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3668   bool inv_p;
3669   VEC(tree,heap) *vec_oprnds = NULL;
3670   bool slp = (slp_node != NULL);
3671   unsigned int vec_num;
3672   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3673   tree aggr_type;
3674 
3675   if (loop_vinfo)
3676     loop = LOOP_VINFO_LOOP (loop_vinfo);
3677 
3678   /* Multiple types in SLP are handled by creating the appropriate number of
3679      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3680      case of SLP.  */
3681   if (slp || PURE_SLP_STMT (stmt_info))
3682     ncopies = 1;
3683   else
3684     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3685 
3686   gcc_assert (ncopies >= 1);
3687 
3688   /* FORNOW. This restriction should be relaxed.  */
3689   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3690     {
3691       if (vect_print_dump_info (REPORT_DETAILS))
3692         fprintf (vect_dump, "multiple types in nested loop.");
3693       return false;
3694     }
3695 
3696   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3697     return false;
3698 
3699   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3700     return false;
3701 
3702   /* Is vectorizable store? */
3703 
3704   if (!is_gimple_assign (stmt))
3705     return false;
3706 
3707   scalar_dest = gimple_assign_lhs (stmt);
3708   if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3709       && is_pattern_stmt_p (stmt_info))
3710     scalar_dest = TREE_OPERAND (scalar_dest, 0);
3711   if (TREE_CODE (scalar_dest) != ARRAY_REF
3712       && TREE_CODE (scalar_dest) != INDIRECT_REF
3713       && TREE_CODE (scalar_dest) != COMPONENT_REF
3714       && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3715       && TREE_CODE (scalar_dest) != REALPART_EXPR
3716       && TREE_CODE (scalar_dest) != MEM_REF)
3717     return false;
3718 
3719   gcc_assert (gimple_assign_single_p (stmt));
3720   op = gimple_assign_rhs1 (stmt);
3721   if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3722 			   &def, &dt))
3723     {
3724       if (vect_print_dump_info (REPORT_DETAILS))
3725         fprintf (vect_dump, "use not simple.");
3726       return false;
3727     }
3728 
3729   elem_type = TREE_TYPE (vectype);
3730   vec_mode = TYPE_MODE (vectype);
3731 
3732   /* FORNOW. In some cases can vectorize even if data-type not supported
3733      (e.g. - array initialization with 0).  */
3734   if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3735     return false;
3736 
3737   if (!STMT_VINFO_DATA_REF (stmt_info))
3738     return false;
3739 
3740   if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3741 			    ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3742 			    size_zero_node) < 0)
3743     {
3744       if (vect_print_dump_info (REPORT_DETAILS))
3745         fprintf (vect_dump, "negative step for store.");
3746       return false;
3747     }
3748 
3749   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3750     {
3751       strided_store = true;
3752       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3753       if (!slp && !PURE_SLP_STMT (stmt_info))
3754 	{
3755 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3756 	  if (vect_store_lanes_supported (vectype, group_size))
3757 	    store_lanes_p = true;
3758 	  else if (!vect_strided_store_supported (vectype, group_size))
3759 	    return false;
3760 	}
3761 
3762       if (first_stmt == stmt)
3763 	{
3764           /* STMT is the leader of the group. Check the operands of all the
3765              stmts of the group.  */
3766           next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3767           while (next_stmt)
3768             {
3769 	      gcc_assert (gimple_assign_single_p (next_stmt));
3770 	      op = gimple_assign_rhs1 (next_stmt);
3771               if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3772 				       &def_stmt, &def, &dt))
3773                 {
3774                   if (vect_print_dump_info (REPORT_DETAILS))
3775                     fprintf (vect_dump, "use not simple.");
3776                   return false;
3777                 }
3778               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3779             }
3780         }
3781     }
3782 
3783   if (!vec_stmt) /* transformation not required.  */
3784     {
3785       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3786       vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3787       return true;
3788     }
3789 
3790   /** Transform.  **/
3791 
3792   if (strided_store)
3793     {
3794       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3795       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3796 
3797       GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3798 
3799       /* FORNOW */
3800       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3801 
3802       /* We vectorize all the stmts of the interleaving group when we
3803 	 reach the last stmt in the group.  */
3804       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3805 	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3806 	  && !slp)
3807 	{
3808 	  *vec_stmt = NULL;
3809 	  return true;
3810 	}
3811 
3812       if (slp)
3813         {
3814           strided_store = false;
3815           /* VEC_NUM is the number of vect stmts to be created for this
3816              group.  */
3817           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3818           first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3819           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3820 	  op = gimple_assign_rhs1 (first_stmt);
3821         }
3822       else
3823         /* VEC_NUM is the number of vect stmts to be created for this
3824            group.  */
3825 	vec_num = group_size;
3826     }
3827   else
3828     {
3829       first_stmt = stmt;
3830       first_dr = dr;
3831       group_size = vec_num = 1;
3832     }
3833 
3834   if (vect_print_dump_info (REPORT_DETAILS))
3835     fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3836 
3837   dr_chain = VEC_alloc (tree, heap, group_size);
3838   oprnds = VEC_alloc (tree, heap, group_size);
3839 
3840   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3841   gcc_assert (alignment_support_scheme);
3842   /* Targets with store-lane instructions must not require explicit
3843      realignment.  */
3844   gcc_assert (!store_lanes_p
3845 	      || alignment_support_scheme == dr_aligned
3846 	      || alignment_support_scheme == dr_unaligned_supported);
3847 
3848   if (store_lanes_p)
3849     aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3850   else
3851     aggr_type = vectype;
3852 
3853   /* In case the vectorization factor (VF) is bigger than the number
3854      of elements that we can fit in a vectype (nunits), we have to generate
3855      more than one vector stmt - i.e - we need to "unroll" the
3856      vector stmt by a factor VF/nunits.  For more details see documentation in
3857      vect_get_vec_def_for_copy_stmt.  */
3858 
3859   /* In case of interleaving (non-unit strided access):
3860 
3861         S1:  &base + 2 = x2
3862         S2:  &base = x0
3863         S3:  &base + 1 = x1
3864         S4:  &base + 3 = x3
3865 
3866      We create vectorized stores starting from base address (the access of the
3867      first stmt in the chain (S2 in the above example), when the last store stmt
3868      of the chain (S4) is reached:
3869 
3870         VS1: &base = vx2
3871 	VS2: &base + vec_size*1 = vx0
3872 	VS3: &base + vec_size*2 = vx1
3873 	VS4: &base + vec_size*3 = vx3
3874 
3875      Then permutation statements are generated:
3876 
3877 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3878 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3879 	...
3880 
3881      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3882      (the order of the data-refs in the output of vect_permute_store_chain
3883      corresponds to the order of scalar stmts in the interleaving chain - see
3884      the documentation of vect_permute_store_chain()).
3885 
3886      In case of both multiple types and interleaving, above vector stores and
3887      permutation stmts are created for every copy.  The result vector stmts are
3888      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3889      STMT_VINFO_RELATED_STMT for the next copies.
3890   */
3891 
3892   prev_stmt_info = NULL;
3893   for (j = 0; j < ncopies; j++)
3894     {
3895       gimple new_stmt;
3896       gimple ptr_incr;
3897 
3898       if (j == 0)
3899 	{
3900           if (slp)
3901             {
3902 	      /* Get vectorized arguments for SLP_NODE.  */
3903               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3904                                  NULL, slp_node, -1);
3905 
3906               vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3907             }
3908           else
3909             {
3910 	      /* For interleaved stores we collect vectorized defs for all the
3911 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3912 		 used as an input to vect_permute_store_chain(), and OPRNDS as
3913 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3914 
3915 		 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3916 		 OPRNDS are of size 1.  */
3917 	      next_stmt = first_stmt;
3918 	      for (i = 0; i < group_size; i++)
3919 		{
3920 		  /* Since gaps are not supported for interleaved stores,
3921 		     GROUP_SIZE is the exact number of stmts in the chain.
3922 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
3923 		     there is no interleaving, GROUP_SIZE is 1, and only one
3924 		     iteration of the loop will be executed.  */
3925 		  gcc_assert (next_stmt
3926 			      && gimple_assign_single_p (next_stmt));
3927 		  op = gimple_assign_rhs1 (next_stmt);
3928 
3929 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3930 							    NULL);
3931 		  VEC_quick_push(tree, dr_chain, vec_oprnd);
3932 		  VEC_quick_push(tree, oprnds, vec_oprnd);
3933 		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3934 		}
3935 	    }
3936 
3937 	  /* We should have catched mismatched types earlier.  */
3938 	  gcc_assert (useless_type_conversion_p (vectype,
3939 						 TREE_TYPE (vec_oprnd)));
3940 	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3941 						  NULL_TREE, &dummy, gsi,
3942 						  &ptr_incr, false, &inv_p);
3943 	  gcc_assert (bb_vinfo || !inv_p);
3944 	}
3945       else
3946 	{
3947 	  /* For interleaved stores we created vectorized defs for all the
3948 	     defs stored in OPRNDS in the previous iteration (previous copy).
3949 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
3950 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3951 	     next copy.
3952 	     If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3953 	     OPRNDS are of size 1.  */
3954 	  for (i = 0; i < group_size; i++)
3955 	    {
3956 	      op = VEC_index (tree, oprnds, i);
3957 	      vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
3958 				  &def, &dt);
3959 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3960 	      VEC_replace(tree, dr_chain, i, vec_oprnd);
3961 	      VEC_replace(tree, oprnds, i, vec_oprnd);
3962 	    }
3963 	  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3964 					 TYPE_SIZE_UNIT (aggr_type));
3965 	}
3966 
3967       if (store_lanes_p)
3968 	{
3969 	  tree vec_array;
3970 
3971 	  /* Combine all the vectors into an array.  */
3972 	  vec_array = create_vector_array (vectype, vec_num);
3973 	  for (i = 0; i < vec_num; i++)
3974 	    {
3975 	      vec_oprnd = VEC_index (tree, dr_chain, i);
3976 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3977 	    }
3978 
3979 	  /* Emit:
3980 	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
3981 	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3982 	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3983 	  gimple_call_set_lhs (new_stmt, data_ref);
3984 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3985 	  mark_symbols_for_renaming (new_stmt);
3986 	}
3987       else
3988 	{
3989 	  new_stmt = NULL;
3990 	  if (strided_store)
3991 	    {
3992 	      result_chain = VEC_alloc (tree, heap, group_size);
3993 	      /* Permute.  */
3994 	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3995 					&result_chain);
3996 	    }
3997 
3998 	  next_stmt = first_stmt;
3999 	  for (i = 0; i < vec_num; i++)
4000 	    {
4001 	      struct ptr_info_def *pi;
4002 
4003 	      if (i > 0)
4004 		/* Bump the vector pointer.  */
4005 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4006 					       stmt, NULL_TREE);
4007 
4008 	      if (slp)
4009 		vec_oprnd = VEC_index (tree, vec_oprnds, i);
4010 	      else if (strided_store)
4011 		/* For strided stores vectorized defs are interleaved in
4012 		   vect_permute_store_chain().  */
4013 		vec_oprnd = VEC_index (tree, result_chain, i);
4014 
4015 	      data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4016 				 build_int_cst (reference_alias_ptr_type
4017 						(DR_REF (first_dr)), 0));
4018 	      pi = get_ptr_info (dataref_ptr);
4019 	      pi->align = TYPE_ALIGN_UNIT (vectype);
4020 	      if (aligned_access_p (first_dr))
4021 		pi->misalign = 0;
4022 	      else if (DR_MISALIGNMENT (first_dr) == -1)
4023 		{
4024 		  TREE_TYPE (data_ref)
4025 		    = build_aligned_type (TREE_TYPE (data_ref),
4026 					  TYPE_ALIGN (elem_type));
4027 		  pi->align = TYPE_ALIGN_UNIT (elem_type);
4028 		  pi->misalign = 0;
4029 		}
4030 	      else
4031 		{
4032 		  TREE_TYPE (data_ref)
4033 		    = build_aligned_type (TREE_TYPE (data_ref),
4034 					  TYPE_ALIGN (elem_type));
4035 		  pi->misalign = DR_MISALIGNMENT (first_dr);
4036 		}
4037 
4038 	      /* Arguments are ready.  Create the new vector stmt.  */
4039 	      new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4040 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4041 	      mark_symbols_for_renaming (new_stmt);
4042 
4043 	      if (slp)
4044 		continue;
4045 
4046 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4047 	      if (!next_stmt)
4048 		break;
4049 	    }
4050 	}
4051       if (!slp)
4052 	{
4053 	  if (j == 0)
4054 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4055 	  else
4056 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4057 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4058 	}
4059     }
4060 
4061   VEC_free (tree, heap, dr_chain);
4062   VEC_free (tree, heap, oprnds);
4063   if (result_chain)
4064     VEC_free (tree, heap, result_chain);
4065   if (vec_oprnds)
4066     VEC_free (tree, heap, vec_oprnds);
4067 
4068   return true;
4069 }
4070 
4071 /* Given a vector type VECTYPE and permutation SEL returns
4072    the VECTOR_CST mask that implements the permutation of the
4073    vector elements.  If that is impossible to do, returns NULL.  */
4074 
4075 tree
4076 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4077 {
4078   tree mask_elt_type, mask_type, mask_vec;
4079   int i, nunits;
4080 
4081   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4082 
4083   if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4084     return NULL;
4085 
4086   mask_elt_type
4087     = lang_hooks.types.type_for_size
4088     (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4089   mask_type = get_vectype_for_scalar_type (mask_elt_type);
4090 
4091   mask_vec = NULL;
4092   for (i = nunits - 1; i >= 0; i--)
4093     mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4094 			  mask_vec);
4095   mask_vec = build_vector (mask_type, mask_vec);
4096 
4097   return mask_vec;
4098 }
4099 
4100 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4101    reversal of the vector elements.  If that is impossible to do,
4102    returns NULL.  */
4103 
4104 static tree
4105 perm_mask_for_reverse (tree vectype)
4106 {
4107   int i, nunits;
4108   unsigned char *sel;
4109 
4110   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4111   sel = XALLOCAVEC (unsigned char, nunits);
4112 
4113   for (i = 0; i < nunits; ++i)
4114     sel[i] = nunits - 1 - i;
4115 
4116   return vect_gen_perm_mask (vectype, sel);
4117 }
4118 
4119 /* Given a vector variable X and Y, that was generated for the scalar
4120    STMT, generate instructions to permute the vector elements of X and Y
4121    using permutation mask MASK_VEC, insert them at *GSI and return the
4122    permuted vector variable.  */
4123 
4124 static tree
4125 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4126 		      gimple_stmt_iterator *gsi)
4127 {
4128   tree vectype = TREE_TYPE (x);
4129   tree perm_dest, data_ref;
4130   gimple perm_stmt;
4131 
4132   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4133   data_ref = make_ssa_name (perm_dest, NULL);
4134 
4135   /* Generate the permute statement.  */
4136   perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4137 					     x, y, mask_vec);
4138   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4139 
4140   return data_ref;
4141 }
4142 
4143 /* vectorizable_load.
4144 
4145    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4146    can be vectorized.
4147    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4148    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4149    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4150 
4151 static bool
4152 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4153 		   slp_tree slp_node, slp_instance slp_node_instance)
4154 {
4155   tree scalar_dest;
4156   tree vec_dest = NULL;
4157   tree data_ref = NULL;
4158   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4159   stmt_vec_info prev_stmt_info;
4160   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4161   struct loop *loop = NULL;
4162   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4163   bool nested_in_vect_loop = false;
4164   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4165   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4166   tree elem_type;
4167   tree new_temp;
4168   enum machine_mode mode;
4169   gimple new_stmt = NULL;
4170   tree dummy;
4171   enum dr_alignment_support alignment_support_scheme;
4172   tree dataref_ptr = NULL_TREE;
4173   gimple ptr_incr;
4174   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4175   int ncopies;
4176   int i, j, group_size;
4177   tree msq = NULL_TREE, lsq;
4178   tree offset = NULL_TREE;
4179   tree realignment_token = NULL_TREE;
4180   gimple phi = NULL;
4181   VEC(tree,heap) *dr_chain = NULL;
4182   bool strided_load = false;
4183   bool load_lanes_p = false;
4184   gimple first_stmt;
4185   bool inv_p;
4186   bool negative;
4187   bool compute_in_loop = false;
4188   struct loop *at_loop;
4189   int vec_num;
4190   bool slp = (slp_node != NULL);
4191   bool slp_perm = false;
4192   enum tree_code code;
4193   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4194   int vf;
4195   tree aggr_type;
4196   tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4197   tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4198   int gather_scale = 1;
4199   enum vect_def_type gather_dt = vect_unknown_def_type;
4200 
4201   if (loop_vinfo)
4202     {
4203       loop = LOOP_VINFO_LOOP (loop_vinfo);
4204       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4205       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4206     }
4207   else
4208     vf = 1;
4209 
4210   /* Multiple types in SLP are handled by creating the appropriate number of
4211      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4212      case of SLP.  */
4213   if (slp || PURE_SLP_STMT (stmt_info))
4214     ncopies = 1;
4215   else
4216     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4217 
4218   gcc_assert (ncopies >= 1);
4219 
4220   /* FORNOW. This restriction should be relaxed.  */
4221   if (nested_in_vect_loop && ncopies > 1)
4222     {
4223       if (vect_print_dump_info (REPORT_DETAILS))
4224         fprintf (vect_dump, "multiple types in nested loop.");
4225       return false;
4226     }
4227 
4228   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4229     return false;
4230 
4231   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4232     return false;
4233 
4234   /* Is vectorizable load? */
4235   if (!is_gimple_assign (stmt))
4236     return false;
4237 
4238   scalar_dest = gimple_assign_lhs (stmt);
4239   if (TREE_CODE (scalar_dest) != SSA_NAME)
4240     return false;
4241 
4242   code = gimple_assign_rhs_code (stmt);
4243   if (code != ARRAY_REF
4244       && code != INDIRECT_REF
4245       && code != COMPONENT_REF
4246       && code != IMAGPART_EXPR
4247       && code != REALPART_EXPR
4248       && code != MEM_REF
4249       && TREE_CODE_CLASS (code) != tcc_declaration)
4250     return false;
4251 
4252   if (!STMT_VINFO_DATA_REF (stmt_info))
4253     return false;
4254 
4255   negative = tree_int_cst_compare (nested_in_vect_loop
4256 				   ? STMT_VINFO_DR_STEP (stmt_info)
4257 				   : DR_STEP (dr),
4258 				   size_zero_node) < 0;
4259   if (negative && ncopies > 1)
4260     {
4261       if (vect_print_dump_info (REPORT_DETAILS))
4262         fprintf (vect_dump, "multiple types with negative step.");
4263       return false;
4264     }
4265 
4266   elem_type = TREE_TYPE (vectype);
4267   mode = TYPE_MODE (vectype);
4268 
4269   /* FORNOW. In some cases can vectorize even if data-type not supported
4270     (e.g. - data copies).  */
4271   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4272     {
4273       if (vect_print_dump_info (REPORT_DETAILS))
4274 	fprintf (vect_dump, "Aligned load, but unsupported type.");
4275       return false;
4276     }
4277 
4278   /* Check if the load is a part of an interleaving chain.  */
4279   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4280     {
4281       strided_load = true;
4282       /* FORNOW */
4283       gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4284 
4285       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4286       if (!slp && !PURE_SLP_STMT (stmt_info))
4287 	{
4288 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4289 	  if (vect_load_lanes_supported (vectype, group_size))
4290 	    load_lanes_p = true;
4291 	  else if (!vect_strided_load_supported (vectype, group_size))
4292 	    return false;
4293 	}
4294     }
4295 
4296   if (negative)
4297     {
4298       gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4299       alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4300       if (alignment_support_scheme != dr_aligned
4301 	  && alignment_support_scheme != dr_unaligned_supported)
4302 	{
4303 	  if (vect_print_dump_info (REPORT_DETAILS))
4304 	    fprintf (vect_dump, "negative step but alignment required.");
4305 	  return false;
4306 	}
4307       if (!perm_mask_for_reverse (vectype))
4308 	{
4309 	  if (vect_print_dump_info (REPORT_DETAILS))
4310 	    fprintf (vect_dump, "negative step and reversing not supported.");
4311 	  return false;
4312 	}
4313     }
4314 
4315   if (STMT_VINFO_GATHER_P (stmt_info))
4316     {
4317       gimple def_stmt;
4318       tree def;
4319       gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4320 				       &gather_off, &gather_scale);
4321       gcc_assert (gather_decl);
4322       if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4323 				 &def_stmt, &def, &gather_dt,
4324 				 &gather_off_vectype))
4325 	{
4326 	  if (vect_print_dump_info (REPORT_DETAILS))
4327 	    fprintf (vect_dump, "gather index use not simple.");
4328 	  return false;
4329 	}
4330     }
4331 
4332   if (!vec_stmt) /* transformation not required.  */
4333     {
4334       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4335       vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4336       return true;
4337     }
4338 
4339   if (vect_print_dump_info (REPORT_DETAILS))
4340     fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4341 
4342   /** Transform.  **/
4343 
4344   if (STMT_VINFO_GATHER_P (stmt_info))
4345     {
4346       tree vec_oprnd0 = NULL_TREE, op;
4347       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4348       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4349       tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4350       edge pe = loop_preheader_edge (loop);
4351       gimple_seq seq;
4352       basic_block new_bb;
4353       enum { NARROW, NONE, WIDEN } modifier;
4354       int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4355 
4356       if (nunits == gather_off_nunits)
4357 	modifier = NONE;
4358       else if (nunits == gather_off_nunits / 2)
4359 	{
4360 	  unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4361 	  modifier = WIDEN;
4362 
4363 	  for (i = 0; i < gather_off_nunits; ++i)
4364 	    sel[i] = i | nunits;
4365 
4366 	  perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4367 	  gcc_assert (perm_mask != NULL_TREE);
4368 	}
4369       else if (nunits == gather_off_nunits * 2)
4370 	{
4371 	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4372 	  modifier = NARROW;
4373 
4374 	  for (i = 0; i < nunits; ++i)
4375 	    sel[i] = i < gather_off_nunits
4376 		     ? i : i + nunits - gather_off_nunits;
4377 
4378 	  perm_mask = vect_gen_perm_mask (vectype, sel);
4379 	  gcc_assert (perm_mask != NULL_TREE);
4380 	  ncopies *= 2;
4381 	}
4382       else
4383 	gcc_unreachable ();
4384 
4385       rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4386       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4387       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4388       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4389       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4390       scaletype = TREE_VALUE (arglist);
4391       gcc_checking_assert (types_compatible_p (srctype, rettype)
4392 			   && types_compatible_p (srctype, masktype));
4393 
4394       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4395 
4396       ptr = fold_convert (ptrtype, gather_base);
4397       if (!is_gimple_min_invariant (ptr))
4398 	{
4399 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4400 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4401 	  gcc_assert (!new_bb);
4402 	}
4403 
4404       /* Currently we support only unconditional gather loads,
4405 	 so mask should be all ones.  */
4406       if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4407 	mask = build_int_cst (TREE_TYPE (masktype), -1);
4408       else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4409 	{
4410 	  REAL_VALUE_TYPE r;
4411 	  long tmp[6];
4412 	  for (j = 0; j < 6; ++j)
4413 	    tmp[j] = -1;
4414 	  real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4415 	  mask = build_real (TREE_TYPE (masktype), r);
4416 	}
4417       else
4418 	gcc_unreachable ();
4419       mask = build_vector_from_val (masktype, mask);
4420       mask = vect_init_vector (stmt, mask, masktype, NULL);
4421 
4422       scale = build_int_cst (scaletype, gather_scale);
4423 
4424       prev_stmt_info = NULL;
4425       for (j = 0; j < ncopies; ++j)
4426 	{
4427 	  if (modifier == WIDEN && (j & 1))
4428 	    op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4429 				       perm_mask, stmt, gsi);
4430 	  else if (j == 0)
4431 	    op = vec_oprnd0
4432 	      = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4433 	  else
4434 	    op = vec_oprnd0
4435 	      = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4436 
4437 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4438 	    {
4439 	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4440 			  == TYPE_VECTOR_SUBPARTS (idxtype));
4441 	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4442 	      add_referenced_var (var);
4443 	      var = make_ssa_name (var, NULL);
4444 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4445 	      new_stmt
4446 		= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4447 						op, NULL_TREE);
4448 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4449 	      op = var;
4450 	    }
4451 
4452 	  new_stmt
4453 	    = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4454 
4455 	  if (!useless_type_conversion_p (vectype, rettype))
4456 	    {
4457 	      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4458 			  == TYPE_VECTOR_SUBPARTS (rettype));
4459 	      var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4460 	      add_referenced_var (var);
4461 	      op = make_ssa_name (var, new_stmt);
4462 	      gimple_call_set_lhs (new_stmt, op);
4463 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4464 	      var = make_ssa_name (vec_dest, NULL);
4465 	      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4466 	      new_stmt
4467 		= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4468 						NULL_TREE);
4469 	    }
4470 	  else
4471 	    {
4472 	      var = make_ssa_name (vec_dest, new_stmt);
4473 	      gimple_call_set_lhs (new_stmt, var);
4474 	    }
4475 
4476 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4477 
4478 	  if (modifier == NARROW)
4479 	    {
4480 	      if ((j & 1) == 0)
4481 		{
4482 		  prev_res = var;
4483 		  continue;
4484 		}
4485 	      var = permute_vec_elements (prev_res, var,
4486 					  perm_mask, stmt, gsi);
4487 	      new_stmt = SSA_NAME_DEF_STMT (var);
4488 	    }
4489 
4490 	  if (prev_stmt_info == NULL)
4491 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4492 	  else
4493 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4494 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4495 	}
4496       return true;
4497     }
4498 
4499   if (strided_load)
4500     {
4501       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4502       if (slp
4503           && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4504 	  && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4505         first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4506 
4507       /* Check if the chain of loads is already vectorized.  */
4508       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4509 	{
4510 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4511 	  return true;
4512 	}
4513       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4514       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4515 
4516       /* VEC_NUM is the number of vect stmts to be created for this group.  */
4517       if (slp)
4518 	{
4519 	  strided_load = false;
4520 	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4521           if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4522             slp_perm = true;
4523     	}
4524       else
4525 	vec_num = group_size;
4526     }
4527   else
4528     {
4529       first_stmt = stmt;
4530       first_dr = dr;
4531       group_size = vec_num = 1;
4532     }
4533 
4534   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4535   gcc_assert (alignment_support_scheme);
4536   /* Targets with load-lane instructions must not require explicit
4537      realignment.  */
4538   gcc_assert (!load_lanes_p
4539 	      || alignment_support_scheme == dr_aligned
4540 	      || alignment_support_scheme == dr_unaligned_supported);
4541 
4542   /* In case the vectorization factor (VF) is bigger than the number
4543      of elements that we can fit in a vectype (nunits), we have to generate
4544      more than one vector stmt - i.e - we need to "unroll" the
4545      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
4546      from one copy of the vector stmt to the next, in the field
4547      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
4548      stages to find the correct vector defs to be used when vectorizing
4549      stmts that use the defs of the current stmt.  The example below
4550      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4551      need to create 4 vectorized stmts):
4552 
4553      before vectorization:
4554                                 RELATED_STMT    VEC_STMT
4555         S1:     x = memref      -               -
4556         S2:     z = x + 1       -               -
4557 
4558      step 1: vectorize stmt S1:
4559         We first create the vector stmt VS1_0, and, as usual, record a
4560         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4561         Next, we create the vector stmt VS1_1, and record a pointer to
4562         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4563         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
4564         stmts and pointers:
4565                                 RELATED_STMT    VEC_STMT
4566         VS1_0:  vx0 = memref0   VS1_1           -
4567         VS1_1:  vx1 = memref1   VS1_2           -
4568         VS1_2:  vx2 = memref2   VS1_3           -
4569         VS1_3:  vx3 = memref3   -               -
4570         S1:     x = load        -               VS1_0
4571         S2:     z = x + 1       -               -
4572 
4573      See in documentation in vect_get_vec_def_for_stmt_copy for how the
4574      information we recorded in RELATED_STMT field is used to vectorize
4575      stmt S2.  */
4576 
4577   /* In case of interleaving (non-unit strided access):
4578 
4579      S1:  x2 = &base + 2
4580      S2:  x0 = &base
4581      S3:  x1 = &base + 1
4582      S4:  x3 = &base + 3
4583 
4584      Vectorized loads are created in the order of memory accesses
4585      starting from the access of the first stmt of the chain:
4586 
4587      VS1: vx0 = &base
4588      VS2: vx1 = &base + vec_size*1
4589      VS3: vx3 = &base + vec_size*2
4590      VS4: vx4 = &base + vec_size*3
4591 
4592      Then permutation statements are generated:
4593 
4594      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4595      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4596        ...
4597 
4598      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4599      (the order of the data-refs in the output of vect_permute_load_chain
4600      corresponds to the order of scalar stmts in the interleaving chain - see
4601      the documentation of vect_permute_load_chain()).
4602      The generation of permutation stmts and recording them in
4603      STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4604 
4605      In case of both multiple types and interleaving, the vector loads and
4606      permutation stmts above are created for every copy.  The result vector
4607      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4608      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
4609 
4610   /* If the data reference is aligned (dr_aligned) or potentially unaligned
4611      on a target that supports unaligned accesses (dr_unaligned_supported)
4612      we generate the following code:
4613          p = initial_addr;
4614          indx = 0;
4615          loop {
4616 	   p = p + indx * vectype_size;
4617            vec_dest = *(p);
4618            indx = indx + 1;
4619          }
4620 
4621      Otherwise, the data reference is potentially unaligned on a target that
4622      does not support unaligned accesses (dr_explicit_realign_optimized) -
4623      then generate the following code, in which the data in each iteration is
4624      obtained by two vector loads, one from the previous iteration, and one
4625      from the current iteration:
4626          p1 = initial_addr;
4627          msq_init = *(floor(p1))
4628          p2 = initial_addr + VS - 1;
4629          realignment_token = call target_builtin;
4630          indx = 0;
4631          loop {
4632            p2 = p2 + indx * vectype_size
4633            lsq = *(floor(p2))
4634            vec_dest = realign_load (msq, lsq, realignment_token)
4635            indx = indx + 1;
4636            msq = lsq;
4637          }   */
4638 
4639   /* If the misalignment remains the same throughout the execution of the
4640      loop, we can create the init_addr and permutation mask at the loop
4641      preheader.  Otherwise, it needs to be created inside the loop.
4642      This can only occur when vectorizing memory accesses in the inner-loop
4643      nested within an outer-loop that is being vectorized.  */
4644 
4645   if (nested_in_vect_loop
4646       && (TREE_INT_CST_LOW (DR_STEP (dr))
4647 	  % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4648     {
4649       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4650       compute_in_loop = true;
4651     }
4652 
4653   if ((alignment_support_scheme == dr_explicit_realign_optimized
4654        || alignment_support_scheme == dr_explicit_realign)
4655       && !compute_in_loop)
4656     {
4657       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4658 				    alignment_support_scheme, NULL_TREE,
4659 				    &at_loop);
4660       if (alignment_support_scheme == dr_explicit_realign_optimized)
4661 	{
4662 	  phi = SSA_NAME_DEF_STMT (msq);
4663 	  offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4664 	}
4665     }
4666   else
4667     at_loop = loop;
4668 
4669   if (negative)
4670     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4671 
4672   if (load_lanes_p)
4673     aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4674   else
4675     aggr_type = vectype;
4676 
4677   prev_stmt_info = NULL;
4678   for (j = 0; j < ncopies; j++)
4679     {
4680       /* 1. Create the vector or array pointer update chain.  */
4681       if (j == 0)
4682         dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4683 						offset, &dummy, gsi,
4684 						&ptr_incr, false, &inv_p);
4685       else
4686         dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4687 				       TYPE_SIZE_UNIT (aggr_type));
4688 
4689       if (strided_load || slp_perm)
4690 	dr_chain = VEC_alloc (tree, heap, vec_num);
4691 
4692       if (load_lanes_p)
4693 	{
4694 	  tree vec_array;
4695 
4696 	  vec_array = create_vector_array (vectype, vec_num);
4697 
4698 	  /* Emit:
4699 	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
4700 	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4701 	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4702 	  gimple_call_set_lhs (new_stmt, vec_array);
4703 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4704 	  mark_symbols_for_renaming (new_stmt);
4705 
4706 	  /* Extract each vector into an SSA_NAME.  */
4707 	  for (i = 0; i < vec_num; i++)
4708 	    {
4709 	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
4710 					    vec_array, i);
4711 	      VEC_quick_push (tree, dr_chain, new_temp);
4712 	    }
4713 
4714 	  /* Record the mapping between SSA_NAMEs and statements.  */
4715 	  vect_record_strided_load_vectors (stmt, dr_chain);
4716 	}
4717       else
4718 	{
4719 	  for (i = 0; i < vec_num; i++)
4720 	    {
4721 	      if (i > 0)
4722 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4723 					       stmt, NULL_TREE);
4724 
4725 	      /* 2. Create the vector-load in the loop.  */
4726 	      switch (alignment_support_scheme)
4727 		{
4728 		case dr_aligned:
4729 		case dr_unaligned_supported:
4730 		  {
4731 		    struct ptr_info_def *pi;
4732 		    data_ref
4733 		      = build2 (MEM_REF, vectype, dataref_ptr,
4734 				build_int_cst (reference_alias_ptr_type
4735 					       (DR_REF (first_dr)), 0));
4736 		    pi = get_ptr_info (dataref_ptr);
4737 		    pi->align = TYPE_ALIGN_UNIT (vectype);
4738 		    if (alignment_support_scheme == dr_aligned)
4739 		      {
4740 			gcc_assert (aligned_access_p (first_dr));
4741 			pi->misalign = 0;
4742 		      }
4743 		    else if (DR_MISALIGNMENT (first_dr) == -1)
4744 		      {
4745 			TREE_TYPE (data_ref)
4746 			  = build_aligned_type (TREE_TYPE (data_ref),
4747 						TYPE_ALIGN (elem_type));
4748 			pi->align = TYPE_ALIGN_UNIT (elem_type);
4749 			pi->misalign = 0;
4750 		      }
4751 		    else
4752 		      {
4753 			TREE_TYPE (data_ref)
4754 			  = build_aligned_type (TREE_TYPE (data_ref),
4755 						TYPE_ALIGN (elem_type));
4756 			pi->misalign = DR_MISALIGNMENT (first_dr);
4757 		      }
4758 		    break;
4759 		  }
4760 		case dr_explicit_realign:
4761 		  {
4762 		    tree ptr, bump;
4763 		    tree vs_minus_1;
4764 
4765 		    vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4766 
4767 		    if (compute_in_loop)
4768 		      msq = vect_setup_realignment (first_stmt, gsi,
4769 						    &realignment_token,
4770 						    dr_explicit_realign,
4771 						    dataref_ptr, NULL);
4772 
4773 		    new_stmt = gimple_build_assign_with_ops
4774 				 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4775 				  build_int_cst
4776 				  (TREE_TYPE (dataref_ptr),
4777 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4778 		    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4779 		    gimple_assign_set_lhs (new_stmt, ptr);
4780 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
4781 		    data_ref
4782 		      = build2 (MEM_REF, vectype, ptr,
4783 				build_int_cst (reference_alias_ptr_type
4784 						 (DR_REF (first_dr)), 0));
4785 		    vec_dest = vect_create_destination_var (scalar_dest,
4786 							    vectype);
4787 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
4788 		    new_temp = make_ssa_name (vec_dest, new_stmt);
4789 		    gimple_assign_set_lhs (new_stmt, new_temp);
4790 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4791 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4792 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
4793 		    msq = new_temp;
4794 
4795 		    bump = size_binop (MULT_EXPR, vs_minus_1,
4796 				       TYPE_SIZE_UNIT (elem_type));
4797 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4798 		    new_stmt = gimple_build_assign_with_ops
4799 				 (BIT_AND_EXPR, NULL_TREE, ptr,
4800 				  build_int_cst
4801 				  (TREE_TYPE (ptr),
4802 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4803 		    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4804 		    gimple_assign_set_lhs (new_stmt, ptr);
4805 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
4806 		    data_ref
4807 		      = build2 (MEM_REF, vectype, ptr,
4808 				build_int_cst (reference_alias_ptr_type
4809 						 (DR_REF (first_dr)), 0));
4810 		    break;
4811 		  }
4812 		case dr_explicit_realign_optimized:
4813 		  new_stmt = gimple_build_assign_with_ops
4814 			       (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4815 				build_int_cst
4816 				  (TREE_TYPE (dataref_ptr),
4817 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4818 		  new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4819 					    new_stmt);
4820 		  gimple_assign_set_lhs (new_stmt, new_temp);
4821 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4822 		  data_ref
4823 		    = build2 (MEM_REF, vectype, new_temp,
4824 			      build_int_cst (reference_alias_ptr_type
4825 					       (DR_REF (first_dr)), 0));
4826 		  break;
4827 		default:
4828 		  gcc_unreachable ();
4829 		}
4830 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
4831 	      new_stmt = gimple_build_assign (vec_dest, data_ref);
4832 	      new_temp = make_ssa_name (vec_dest, new_stmt);
4833 	      gimple_assign_set_lhs (new_stmt, new_temp);
4834 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4835 	      mark_symbols_for_renaming (new_stmt);
4836 
4837 	      /* 3. Handle explicit realignment if necessary/supported.
4838 		 Create in loop:
4839 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
4840 	      if (alignment_support_scheme == dr_explicit_realign_optimized
4841 		  || alignment_support_scheme == dr_explicit_realign)
4842 		{
4843 		  lsq = gimple_assign_lhs (new_stmt);
4844 		  if (!realignment_token)
4845 		    realignment_token = dataref_ptr;
4846 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
4847 		  new_stmt
4848 		    = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4849 						     vec_dest, msq, lsq,
4850 						     realignment_token);
4851 		  new_temp = make_ssa_name (vec_dest, new_stmt);
4852 		  gimple_assign_set_lhs (new_stmt, new_temp);
4853 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4854 
4855 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
4856 		    {
4857 		      gcc_assert (phi);
4858 		      if (i == vec_num - 1 && j == ncopies - 1)
4859 			add_phi_arg (phi, lsq,
4860 				     loop_latch_edge (containing_loop),
4861 				     UNKNOWN_LOCATION);
4862 		      msq = lsq;
4863 		    }
4864 		}
4865 
4866 	      /* 4. Handle invariant-load.  */
4867 	      if (inv_p && !bb_vinfo)
4868 		{
4869 		  tree tem, vec_inv;
4870 		  gimple_stmt_iterator gsi2 = *gsi;
4871 		  gcc_assert (!strided_load);
4872 		  gsi_next (&gsi2);
4873 		  tem = scalar_dest;
4874 		  if (!useless_type_conversion_p (TREE_TYPE (vectype),
4875 						  TREE_TYPE (tem)))
4876 		    {
4877 		      tem = fold_convert (TREE_TYPE (vectype), tem);
4878 		      tem = force_gimple_operand_gsi (&gsi2, tem, true,
4879 						      NULL_TREE, true,
4880 						      GSI_SAME_STMT);
4881 		    }
4882 		  vec_inv = build_vector_from_val (vectype, tem);
4883 		  new_temp = vect_init_vector (stmt, vec_inv,
4884 					       vectype, &gsi2);
4885 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
4886 		}
4887 
4888 	      if (negative)
4889 		{
4890 		  tree perm_mask = perm_mask_for_reverse (vectype);
4891 		  new_temp = permute_vec_elements (new_temp, new_temp,
4892 						   perm_mask, stmt, gsi);
4893 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
4894 		}
4895 
4896 	      /* Collect vector loads and later create their permutation in
4897 		 vect_transform_strided_load ().  */
4898 	      if (strided_load || slp_perm)
4899 		VEC_quick_push (tree, dr_chain, new_temp);
4900 
4901 	      /* Store vector loads in the corresponding SLP_NODE.  */
4902 	      if (slp && !slp_perm)
4903 		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4904 				new_stmt);
4905 	    }
4906 	}
4907 
4908       if (slp && !slp_perm)
4909 	continue;
4910 
4911       if (slp_perm)
4912         {
4913           if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4914                                              slp_node_instance, false))
4915             {
4916               VEC_free (tree, heap, dr_chain);
4917               return false;
4918             }
4919         }
4920       else
4921         {
4922           if (strided_load)
4923   	    {
4924 	      if (!load_lanes_p)
4925 		vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4926 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4927 	    }
4928           else
4929 	    {
4930 	      if (j == 0)
4931 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4932 	      else
4933 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4934 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
4935 	    }
4936         }
4937       if (dr_chain)
4938 	VEC_free (tree, heap, dr_chain);
4939     }
4940 
4941   return true;
4942 }
4943 
4944 /* Function vect_is_simple_cond.
4945 
4946    Input:
4947    LOOP - the loop that is being vectorized.
4948    COND - Condition that is checked for simple use.
4949 
4950    Output:
4951    *COMP_VECTYPE - the vector type for the comparison.
4952 
4953    Returns whether a COND can be vectorized.  Checks whether
4954    condition operands are supportable using vec_is_simple_use.  */
4955 
4956 static bool
4957 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
4958 		     bb_vec_info bb_vinfo, tree *comp_vectype)
4959 {
4960   tree lhs, rhs;
4961   tree def;
4962   enum vect_def_type dt;
4963   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4964 
4965   if (!COMPARISON_CLASS_P (cond))
4966     return false;
4967 
4968   lhs = TREE_OPERAND (cond, 0);
4969   rhs = TREE_OPERAND (cond, 1);
4970 
4971   if (TREE_CODE (lhs) == SSA_NAME)
4972     {
4973       gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4974       if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
4975 				 &lhs_def_stmt, &def, &dt, &vectype1))
4976 	return false;
4977     }
4978   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4979 	   && TREE_CODE (lhs) != FIXED_CST)
4980     return false;
4981 
4982   if (TREE_CODE (rhs) == SSA_NAME)
4983     {
4984       gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4985       if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
4986 				 &rhs_def_stmt, &def, &dt, &vectype2))
4987 	return false;
4988     }
4989   else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4990 	   && TREE_CODE (rhs) != FIXED_CST)
4991     return false;
4992 
4993   *comp_vectype = vectype1 ? vectype1 : vectype2;
4994   return true;
4995 }
4996 
4997 /* vectorizable_condition.
4998 
4999    Check if STMT is conditional modify expression that can be vectorized.
5000    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5001    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
5002    at GSI.
5003 
5004    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5005    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5006    else caluse if it is 2).
5007 
5008    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5009 
5010 bool
5011 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5012 			gimple *vec_stmt, tree reduc_def, int reduc_index,
5013 			slp_tree slp_node)
5014 {
5015   tree scalar_dest = NULL_TREE;
5016   tree vec_dest = NULL_TREE;
5017   tree cond_expr, then_clause, else_clause;
5018   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5019   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5020   tree comp_vectype = NULL_TREE;
5021   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5022   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5023   tree vec_compare, vec_cond_expr;
5024   tree new_temp;
5025   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5026   tree def;
5027   enum vect_def_type dt, dts[4];
5028   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5029   int ncopies;
5030   enum tree_code code;
5031   stmt_vec_info prev_stmt_info = NULL;
5032   int i, j;
5033   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5034   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5035   VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5036 
5037   if (slp_node || PURE_SLP_STMT (stmt_info))
5038     ncopies = 1;
5039   else
5040     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5041 
5042   gcc_assert (ncopies >= 1);
5043   if (reduc_index && ncopies > 1)
5044     return false; /* FORNOW */
5045 
5046   if (reduc_index && STMT_SLP_TYPE (stmt_info))
5047     return false;
5048 
5049   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5050     return false;
5051 
5052   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5053       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5054            && reduc_def))
5055     return false;
5056 
5057   /* FORNOW: not yet supported.  */
5058   if (STMT_VINFO_LIVE_P (stmt_info))
5059     {
5060       if (vect_print_dump_info (REPORT_DETAILS))
5061         fprintf (vect_dump, "value used after loop.");
5062       return false;
5063     }
5064 
5065   /* Is vectorizable conditional operation?  */
5066   if (!is_gimple_assign (stmt))
5067     return false;
5068 
5069   code = gimple_assign_rhs_code (stmt);
5070 
5071   if (code != COND_EXPR)
5072     return false;
5073 
5074   cond_expr = gimple_assign_rhs1 (stmt);
5075   then_clause = gimple_assign_rhs2 (stmt);
5076   else_clause = gimple_assign_rhs3 (stmt);
5077 
5078   if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5079 			    &comp_vectype)
5080       || !comp_vectype)
5081     return false;
5082 
5083   if (TREE_CODE (then_clause) == SSA_NAME)
5084     {
5085       gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5086       if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5087 			       &then_def_stmt, &def, &dt))
5088 	return false;
5089     }
5090   else if (TREE_CODE (then_clause) != INTEGER_CST
5091 	   && TREE_CODE (then_clause) != REAL_CST
5092 	   && TREE_CODE (then_clause) != FIXED_CST)
5093     return false;
5094 
5095   if (TREE_CODE (else_clause) == SSA_NAME)
5096     {
5097       gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5098       if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5099 			       &else_def_stmt, &def, &dt))
5100 	return false;
5101     }
5102   else if (TREE_CODE (else_clause) != INTEGER_CST
5103 	   && TREE_CODE (else_clause) != REAL_CST
5104 	   && TREE_CODE (else_clause) != FIXED_CST)
5105     return false;
5106 
5107   if (!vec_stmt)
5108     {
5109       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5110       return expand_vec_cond_expr_p (vectype, comp_vectype);
5111     }
5112 
5113   /* Transform.  */
5114 
5115   if (!slp_node)
5116     {
5117       vec_oprnds0 = VEC_alloc (tree, heap, 1);
5118       vec_oprnds1 = VEC_alloc (tree, heap, 1);
5119       vec_oprnds2 = VEC_alloc (tree, heap, 1);
5120       vec_oprnds3 = VEC_alloc (tree, heap, 1);
5121     }
5122 
5123   /* Handle def.  */
5124   scalar_dest = gimple_assign_lhs (stmt);
5125   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5126 
5127   /* Handle cond expr.  */
5128   for (j = 0; j < ncopies; j++)
5129     {
5130       gimple new_stmt = NULL;
5131       if (j == 0)
5132 	{
5133           if (slp_node)
5134             {
5135               VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5136               VEC (slp_void_p, heap) *vec_defs;
5137 
5138 	      vec_defs = VEC_alloc (slp_void_p, heap, 4);
5139               VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5140               VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5141               VEC_safe_push (tree, heap, ops, then_clause);
5142               VEC_safe_push (tree, heap, ops, else_clause);
5143               vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5144               vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5145               vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5146               vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5147               vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5148 
5149               VEC_free (tree, heap, ops);
5150               VEC_free (slp_void_p, heap, vec_defs);
5151             }
5152           else
5153             {
5154 	      gimple gtemp;
5155 	      vec_cond_lhs =
5156 	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5157 					    stmt, NULL);
5158 	      vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5159 				  loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5160 
5161 	      vec_cond_rhs =
5162 		vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5163 						stmt, NULL);
5164 	      vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5165 				  loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5166 	      if (reduc_index == 1)
5167 		vec_then_clause = reduc_def;
5168 	      else
5169 		{
5170 		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5171 		 		  			      stmt, NULL);
5172 	          vect_is_simple_use (then_clause, stmt, loop_vinfo,
5173 					  NULL, &gtemp, &def, &dts[2]);
5174 		}
5175 	      if (reduc_index == 2)
5176 		vec_else_clause = reduc_def;
5177 	      else
5178 		{
5179 		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5180 							      stmt, NULL);
5181 		  vect_is_simple_use (else_clause, stmt, loop_vinfo,
5182 				  NULL, &gtemp, &def, &dts[3]);
5183 		}
5184 	    }
5185 	}
5186       else
5187 	{
5188 	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5189 						VEC_pop (tree, vec_oprnds0));
5190 	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5191 						VEC_pop (tree, vec_oprnds1));
5192 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5193 						VEC_pop (tree, vec_oprnds2));
5194 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5195 						VEC_pop (tree, vec_oprnds3));
5196 	}
5197 
5198       if (!slp_node)
5199         {
5200 	  VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5201 	  VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5202 	  VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5203 	  VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5204 	}
5205 
5206       /* Arguments are ready.  Create the new vector stmt.  */
5207       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5208         {
5209           vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5210           vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5211           vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5212 
5213           vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5214   			       vec_cond_lhs, vec_cond_rhs);
5215           vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5216  		         vec_compare, vec_then_clause, vec_else_clause);
5217 
5218           new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5219           new_temp = make_ssa_name (vec_dest, new_stmt);
5220           gimple_assign_set_lhs (new_stmt, new_temp);
5221           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5222           if (slp_node)
5223             VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5224         }
5225 
5226         if (slp_node)
5227           continue;
5228 
5229         if (j == 0)
5230           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5231         else
5232           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5233 
5234         prev_stmt_info = vinfo_for_stmt (new_stmt);
5235     }
5236 
5237   VEC_free (tree, heap, vec_oprnds0);
5238   VEC_free (tree, heap, vec_oprnds1);
5239   VEC_free (tree, heap, vec_oprnds2);
5240   VEC_free (tree, heap, vec_oprnds3);
5241 
5242   return true;
5243 }
5244 
5245 
5246 /* Make sure the statement is vectorizable.  */
5247 
5248 bool
5249 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5250 {
5251   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5252   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5253   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5254   bool ok;
5255   tree scalar_type, vectype;
5256   gimple pattern_stmt;
5257   gimple_seq pattern_def_seq;
5258 
5259   if (vect_print_dump_info (REPORT_DETAILS))
5260     {
5261       fprintf (vect_dump, "==> examining statement: ");
5262       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5263     }
5264 
5265   if (gimple_has_volatile_ops (stmt))
5266     {
5267       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5268         fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5269 
5270       return false;
5271     }
5272 
5273   /* Skip stmts that do not need to be vectorized. In loops this is expected
5274      to include:
5275      - the COND_EXPR which is the loop exit condition
5276      - any LABEL_EXPRs in the loop
5277      - computations that are used only for array indexing or loop control.
5278      In basic blocks we only analyze statements that are a part of some SLP
5279      instance, therefore, all the statements are relevant.
5280 
5281      Pattern statement needs to be analyzed instead of the original statement
5282      if the original statement is not relevant.  Otherwise, we analyze both
5283      statements.  */
5284 
5285   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5286   if (!STMT_VINFO_RELEVANT_P (stmt_info)
5287       && !STMT_VINFO_LIVE_P (stmt_info))
5288     {
5289       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5290           && pattern_stmt
5291           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5292               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5293         {
5294           /* Analyze PATTERN_STMT instead of the original stmt.  */
5295           stmt = pattern_stmt;
5296           stmt_info = vinfo_for_stmt (pattern_stmt);
5297           if (vect_print_dump_info (REPORT_DETAILS))
5298             {
5299               fprintf (vect_dump, "==> examining pattern statement: ");
5300               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5301             }
5302         }
5303       else
5304         {
5305           if (vect_print_dump_info (REPORT_DETAILS))
5306             fprintf (vect_dump, "irrelevant.");
5307 
5308           return true;
5309         }
5310     }
5311   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5312            && pattern_stmt
5313            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5314                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5315     {
5316       /* Analyze PATTERN_STMT too.  */
5317       if (vect_print_dump_info (REPORT_DETAILS))
5318         {
5319           fprintf (vect_dump, "==> examining pattern statement: ");
5320           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5321         }
5322 
5323       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5324         return false;
5325    }
5326 
5327   if (is_pattern_stmt_p (stmt_info)
5328       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5329     {
5330       gimple_stmt_iterator si;
5331 
5332       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5333 	{
5334 	  gimple pattern_def_stmt = gsi_stmt (si);
5335 	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5336 	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5337 	    {
5338 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
5339 	      if (vect_print_dump_info (REPORT_DETAILS))
5340 		{
5341 		  fprintf (vect_dump, "==> examining pattern def statement: ");
5342 		  print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5343 		}
5344 
5345 	      if (!vect_analyze_stmt (pattern_def_stmt,
5346 				      need_to_vectorize, node))
5347 		return false;
5348 	    }
5349 	}
5350     }
5351 
5352   switch (STMT_VINFO_DEF_TYPE (stmt_info))
5353     {
5354       case vect_internal_def:
5355         break;
5356 
5357       case vect_reduction_def:
5358       case vect_nested_cycle:
5359          gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5360                      || relevance == vect_used_in_outer_by_reduction
5361                      || relevance == vect_unused_in_scope));
5362          break;
5363 
5364       case vect_induction_def:
5365       case vect_constant_def:
5366       case vect_external_def:
5367       case vect_unknown_def_type:
5368       default:
5369         gcc_unreachable ();
5370     }
5371 
5372   if (bb_vinfo)
5373     {
5374       gcc_assert (PURE_SLP_STMT (stmt_info));
5375 
5376       scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5377       if (vect_print_dump_info (REPORT_DETAILS))
5378         {
5379           fprintf (vect_dump, "get vectype for scalar type:  ");
5380           print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5381         }
5382 
5383       vectype = get_vectype_for_scalar_type (scalar_type);
5384       if (!vectype)
5385         {
5386           if (vect_print_dump_info (REPORT_DETAILS))
5387             {
5388                fprintf (vect_dump, "not SLPed: unsupported data-type ");
5389                print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5390             }
5391           return false;
5392         }
5393 
5394       if (vect_print_dump_info (REPORT_DETAILS))
5395         {
5396           fprintf (vect_dump, "vectype:  ");
5397           print_generic_expr (vect_dump, vectype, TDF_SLIM);
5398         }
5399 
5400       STMT_VINFO_VECTYPE (stmt_info) = vectype;
5401    }
5402 
5403   if (STMT_VINFO_RELEVANT_P (stmt_info))
5404     {
5405       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5406       gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5407       *need_to_vectorize = true;
5408     }
5409 
5410    ok = true;
5411    if (!bb_vinfo
5412        && (STMT_VINFO_RELEVANT_P (stmt_info)
5413            || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5414       ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5415             || vectorizable_shift (stmt, NULL, NULL, NULL)
5416             || vectorizable_operation (stmt, NULL, NULL, NULL)
5417             || vectorizable_assignment (stmt, NULL, NULL, NULL)
5418             || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5419 	    || vectorizable_call (stmt, NULL, NULL, NULL)
5420             || vectorizable_store (stmt, NULL, NULL, NULL)
5421             || vectorizable_reduction (stmt, NULL, NULL, NULL)
5422             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5423     else
5424       {
5425         if (bb_vinfo)
5426 	  ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5427 		|| vectorizable_shift (stmt, NULL, NULL, node)
5428                 || vectorizable_operation (stmt, NULL, NULL, node)
5429                 || vectorizable_assignment (stmt, NULL, NULL, node)
5430                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5431 		|| vectorizable_call (stmt, NULL, NULL, node)
5432                 || vectorizable_store (stmt, NULL, NULL, node)
5433                 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5434       }
5435 
5436   if (!ok)
5437     {
5438       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5439         {
5440           fprintf (vect_dump, "not vectorized: relevant stmt not ");
5441           fprintf (vect_dump, "supported: ");
5442           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5443         }
5444 
5445       return false;
5446     }
5447 
5448   if (bb_vinfo)
5449     return true;
5450 
5451   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5452       need extra handling, except for vectorizable reductions.  */
5453   if (STMT_VINFO_LIVE_P (stmt_info)
5454       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5455     ok = vectorizable_live_operation (stmt, NULL, NULL);
5456 
5457   if (!ok)
5458     {
5459       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5460         {
5461           fprintf (vect_dump, "not vectorized: live stmt not ");
5462           fprintf (vect_dump, "supported: ");
5463           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5464         }
5465 
5466        return false;
5467     }
5468 
5469   return true;
5470 }
5471 
5472 
5473 /* Function vect_transform_stmt.
5474 
5475    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
5476 
5477 bool
5478 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5479 		     bool *strided_store, slp_tree slp_node,
5480                      slp_instance slp_node_instance)
5481 {
5482   bool is_store = false;
5483   gimple vec_stmt = NULL;
5484   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5485   bool done;
5486 
5487   switch (STMT_VINFO_TYPE (stmt_info))
5488     {
5489     case type_demotion_vec_info_type:
5490     case type_promotion_vec_info_type:
5491     case type_conversion_vec_info_type:
5492       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5493       gcc_assert (done);
5494       break;
5495 
5496     case induc_vec_info_type:
5497       gcc_assert (!slp_node);
5498       done = vectorizable_induction (stmt, gsi, &vec_stmt);
5499       gcc_assert (done);
5500       break;
5501 
5502     case shift_vec_info_type:
5503       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5504       gcc_assert (done);
5505       break;
5506 
5507     case op_vec_info_type:
5508       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5509       gcc_assert (done);
5510       break;
5511 
5512     case assignment_vec_info_type:
5513       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5514       gcc_assert (done);
5515       break;
5516 
5517     case load_vec_info_type:
5518       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5519                                 slp_node_instance);
5520       gcc_assert (done);
5521       break;
5522 
5523     case store_vec_info_type:
5524       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5525       gcc_assert (done);
5526       if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5527 	{
5528 	  /* In case of interleaving, the whole chain is vectorized when the
5529 	     last store in the chain is reached.  Store stmts before the last
5530 	     one are skipped, and there vec_stmt_info shouldn't be freed
5531 	     meanwhile.  */
5532 	  *strided_store = true;
5533 	  if (STMT_VINFO_VEC_STMT (stmt_info))
5534 	    is_store = true;
5535 	  }
5536       else
5537 	is_store = true;
5538       break;
5539 
5540     case condition_vec_info_type:
5541       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5542       gcc_assert (done);
5543       break;
5544 
5545     case call_vec_info_type:
5546       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5547       stmt = gsi_stmt (*gsi);
5548       break;
5549 
5550     case reduc_vec_info_type:
5551       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5552       gcc_assert (done);
5553       break;
5554 
5555     default:
5556       if (!STMT_VINFO_LIVE_P (stmt_info))
5557 	{
5558 	  if (vect_print_dump_info (REPORT_DETAILS))
5559 	    fprintf (vect_dump, "stmt not supported.");
5560 	  gcc_unreachable ();
5561 	}
5562     }
5563 
5564   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5565      is being vectorized, but outside the immediately enclosing loop.  */
5566   if (vec_stmt
5567       && STMT_VINFO_LOOP_VINFO (stmt_info)
5568       && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5569                                 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5570       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5571       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5572           || STMT_VINFO_RELEVANT (stmt_info) ==
5573                                            vect_used_in_outer_by_reduction))
5574     {
5575       struct loop *innerloop = LOOP_VINFO_LOOP (
5576                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5577       imm_use_iterator imm_iter;
5578       use_operand_p use_p;
5579       tree scalar_dest;
5580       gimple exit_phi;
5581 
5582       if (vect_print_dump_info (REPORT_DETAILS))
5583         fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5584 
5585       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5586         (to be used when vectorizing outer-loop stmts that use the DEF of
5587         STMT).  */
5588       if (gimple_code (stmt) == GIMPLE_PHI)
5589         scalar_dest = PHI_RESULT (stmt);
5590       else
5591         scalar_dest = gimple_assign_lhs (stmt);
5592 
5593       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5594        {
5595          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5596            {
5597              exit_phi = USE_STMT (use_p);
5598              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5599            }
5600        }
5601     }
5602 
5603   /* Handle stmts whose DEF is used outside the loop-nest that is
5604      being vectorized.  */
5605   if (STMT_VINFO_LIVE_P (stmt_info)
5606       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5607     {
5608       done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5609       gcc_assert (done);
5610     }
5611 
5612   if (vec_stmt)
5613     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5614 
5615   return is_store;
5616 }
5617 
5618 
5619 /* Remove a group of stores (for SLP or interleaving), free their
5620    stmt_vec_info.  */
5621 
5622 void
5623 vect_remove_stores (gimple first_stmt)
5624 {
5625   gimple next = first_stmt;
5626   gimple tmp;
5627   gimple_stmt_iterator next_si;
5628 
5629   while (next)
5630     {
5631       stmt_vec_info stmt_info = vinfo_for_stmt (next);
5632 
5633       tmp = GROUP_NEXT_ELEMENT (stmt_info);
5634       if (is_pattern_stmt_p (stmt_info))
5635 	next = STMT_VINFO_RELATED_STMT (stmt_info);
5636       /* Free the attached stmt_vec_info and remove the stmt.  */
5637       next_si = gsi_for_stmt (next);
5638       gsi_remove (&next_si, true);
5639       free_stmt_vec_info (next);
5640       next = tmp;
5641     }
5642 }
5643 
5644 
5645 /* Function new_stmt_vec_info.
5646 
5647    Create and initialize a new stmt_vec_info struct for STMT.  */
5648 
5649 stmt_vec_info
5650 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5651                    bb_vec_info bb_vinfo)
5652 {
5653   stmt_vec_info res;
5654   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5655 
5656   STMT_VINFO_TYPE (res) = undef_vec_info_type;
5657   STMT_VINFO_STMT (res) = stmt;
5658   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5659   STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5660   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5661   STMT_VINFO_LIVE_P (res) = false;
5662   STMT_VINFO_VECTYPE (res) = NULL;
5663   STMT_VINFO_VEC_STMT (res) = NULL;
5664   STMT_VINFO_VECTORIZABLE (res) = true;
5665   STMT_VINFO_IN_PATTERN_P (res) = false;
5666   STMT_VINFO_RELATED_STMT (res) = NULL;
5667   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5668   STMT_VINFO_DATA_REF (res) = NULL;
5669 
5670   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5671   STMT_VINFO_DR_OFFSET (res) = NULL;
5672   STMT_VINFO_DR_INIT (res) = NULL;
5673   STMT_VINFO_DR_STEP (res) = NULL;
5674   STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5675 
5676   if (gimple_code (stmt) == GIMPLE_PHI
5677       && is_loop_header_bb_p (gimple_bb (stmt)))
5678     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5679   else
5680     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5681 
5682   STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
5683   STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5684   STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5685   STMT_SLP_TYPE (res) = loop_vect;
5686   GROUP_FIRST_ELEMENT (res) = NULL;
5687   GROUP_NEXT_ELEMENT (res) = NULL;
5688   GROUP_SIZE (res) = 0;
5689   GROUP_STORE_COUNT (res) = 0;
5690   GROUP_GAP (res) = 0;
5691   GROUP_SAME_DR_STMT (res) = NULL;
5692   GROUP_READ_WRITE_DEPENDENCE (res) = false;
5693 
5694   return res;
5695 }
5696 
5697 
5698 /* Create a hash table for stmt_vec_info. */
5699 
5700 void
5701 init_stmt_vec_info_vec (void)
5702 {
5703   gcc_assert (!stmt_vec_info_vec);
5704   stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5705 }
5706 
5707 
5708 /* Free hash table for stmt_vec_info. */
5709 
5710 void
5711 free_stmt_vec_info_vec (void)
5712 {
5713   gcc_assert (stmt_vec_info_vec);
5714   VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5715 }
5716 
5717 
5718 /* Free stmt vectorization related info.  */
5719 
5720 void
5721 free_stmt_vec_info (gimple stmt)
5722 {
5723   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5724 
5725   if (!stmt_info)
5726     return;
5727 
5728   /* Check if this statement has a related "pattern stmt"
5729      (introduced by the vectorizer during the pattern recognition
5730      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5731      too.  */
5732   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5733     {
5734       stmt_vec_info patt_info
5735 	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5736       if (patt_info)
5737 	{
5738 	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5739 	  if (seq)
5740 	    {
5741 	      gimple_stmt_iterator si;
5742 	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5743 		free_stmt_vec_info (gsi_stmt (si));
5744 	    }
5745 	  free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5746 	}
5747     }
5748 
5749   VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5750   set_vinfo_for_stmt (stmt, NULL);
5751   free (stmt_info);
5752 }
5753 
5754 
5755 /* Function get_vectype_for_scalar_type_and_size.
5756 
5757    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
5758    by the target.  */
5759 
5760 static tree
5761 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5762 {
5763   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5764   enum machine_mode simd_mode;
5765   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5766   int nunits;
5767   tree vectype;
5768 
5769   if (nbytes == 0)
5770     return NULL_TREE;
5771 
5772   if (GET_MODE_CLASS (inner_mode) != MODE_INT
5773       && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5774     return NULL_TREE;
5775 
5776   /* For vector types of elements whose mode precision doesn't
5777      match their types precision we use a element type of mode
5778      precision.  The vectorization routines will have to make sure
5779      they support the proper result truncation/extension.
5780      We also make sure to build vector types with INTEGER_TYPE
5781      component type only.  */
5782   if (INTEGRAL_TYPE_P (scalar_type)
5783       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5784 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
5785     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5786 						  TYPE_UNSIGNED (scalar_type));
5787 
5788   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5789      When the component mode passes the above test simply use a type
5790      corresponding to that mode.  The theory is that any use that
5791      would cause problems with this will disable vectorization anyway.  */
5792   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5793 	   && !INTEGRAL_TYPE_P (scalar_type)
5794 	   && !POINTER_TYPE_P (scalar_type))
5795     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5796 
5797   /* We can't build a vector type of elements with alignment bigger than
5798      their size.  */
5799   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5800     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
5801 						  TYPE_UNSIGNED (scalar_type));
5802 
5803   /* If we felt back to using the mode fail if there was
5804      no scalar type for it.  */
5805   if (scalar_type == NULL_TREE)
5806     return NULL_TREE;
5807 
5808   /* If no size was supplied use the mode the target prefers.   Otherwise
5809      lookup a vector mode of the specified size.  */
5810   if (size == 0)
5811     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5812   else
5813     simd_mode = mode_for_vector (inner_mode, size / nbytes);
5814   nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5815   if (nunits <= 1)
5816     return NULL_TREE;
5817 
5818   vectype = build_vector_type (scalar_type, nunits);
5819   if (vect_print_dump_info (REPORT_DETAILS))
5820     {
5821       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5822       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5823     }
5824 
5825   if (!vectype)
5826     return NULL_TREE;
5827 
5828   if (vect_print_dump_info (REPORT_DETAILS))
5829     {
5830       fprintf (vect_dump, "vectype: ");
5831       print_generic_expr (vect_dump, vectype, TDF_SLIM);
5832     }
5833 
5834   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5835       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5836     {
5837       if (vect_print_dump_info (REPORT_DETAILS))
5838         fprintf (vect_dump, "mode not supported by target.");
5839       return NULL_TREE;
5840     }
5841 
5842   return vectype;
5843 }
5844 
5845 unsigned int current_vector_size;
5846 
5847 /* Function get_vectype_for_scalar_type.
5848 
5849    Returns the vector type corresponding to SCALAR_TYPE as supported
5850    by the target.  */
5851 
5852 tree
5853 get_vectype_for_scalar_type (tree scalar_type)
5854 {
5855   tree vectype;
5856   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5857 						  current_vector_size);
5858   if (vectype
5859       && current_vector_size == 0)
5860     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5861   return vectype;
5862 }
5863 
5864 /* Function get_same_sized_vectype
5865 
5866    Returns a vector type corresponding to SCALAR_TYPE of size
5867    VECTOR_TYPE if supported by the target.  */
5868 
5869 tree
5870 get_same_sized_vectype (tree scalar_type, tree vector_type)
5871 {
5872   return get_vectype_for_scalar_type_and_size
5873 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5874 }
5875 
5876 /* Function vect_is_simple_use.
5877 
5878    Input:
5879    LOOP_VINFO - the vect info of the loop that is being vectorized.
5880    BB_VINFO - the vect info of the basic block that is being vectorized.
5881    OPERAND - operand of STMT in the loop or bb.
5882    DEF - the defining stmt in case OPERAND is an SSA_NAME.
5883 
5884    Returns whether a stmt with OPERAND can be vectorized.
5885    For loops, supportable operands are constants, loop invariants, and operands
5886    that are defined by the current iteration of the loop.  Unsupportable
5887    operands are those that are defined by a previous iteration of the loop (as
5888    is the case in reduction/induction computations).
5889    For basic blocks, supportable operands are constants and bb invariants.
5890    For now, operands defined outside the basic block are not supported.  */
5891 
5892 bool
5893 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
5894                     bb_vec_info bb_vinfo, gimple *def_stmt,
5895 		    tree *def, enum vect_def_type *dt)
5896 {
5897   basic_block bb;
5898   stmt_vec_info stmt_vinfo;
5899   struct loop *loop = NULL;
5900 
5901   if (loop_vinfo)
5902     loop = LOOP_VINFO_LOOP (loop_vinfo);
5903 
5904   *def_stmt = NULL;
5905   *def = NULL_TREE;
5906 
5907   if (vect_print_dump_info (REPORT_DETAILS))
5908     {
5909       fprintf (vect_dump, "vect_is_simple_use: operand ");
5910       print_generic_expr (vect_dump, operand, TDF_SLIM);
5911     }
5912 
5913   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5914     {
5915       *dt = vect_constant_def;
5916       return true;
5917     }
5918 
5919   if (is_gimple_min_invariant (operand))
5920     {
5921       *def = operand;
5922       *dt = vect_external_def;
5923       return true;
5924     }
5925 
5926   if (TREE_CODE (operand) == PAREN_EXPR)
5927     {
5928       if (vect_print_dump_info (REPORT_DETAILS))
5929         fprintf (vect_dump, "non-associatable copy.");
5930       operand = TREE_OPERAND (operand, 0);
5931     }
5932 
5933   if (TREE_CODE (operand) != SSA_NAME)
5934     {
5935       if (vect_print_dump_info (REPORT_DETAILS))
5936         fprintf (vect_dump, "not ssa-name.");
5937       return false;
5938     }
5939 
5940   *def_stmt = SSA_NAME_DEF_STMT (operand);
5941   if (*def_stmt == NULL)
5942     {
5943       if (vect_print_dump_info (REPORT_DETAILS))
5944         fprintf (vect_dump, "no def_stmt.");
5945       return false;
5946     }
5947 
5948   if (vect_print_dump_info (REPORT_DETAILS))
5949     {
5950       fprintf (vect_dump, "def_stmt: ");
5951       print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5952     }
5953 
5954   /* Empty stmt is expected only in case of a function argument.
5955      (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
5956   if (gimple_nop_p (*def_stmt))
5957     {
5958       *def = operand;
5959       *dt = vect_external_def;
5960       return true;
5961     }
5962 
5963   bb = gimple_bb (*def_stmt);
5964 
5965   if ((loop && !flow_bb_inside_loop_p (loop, bb))
5966       || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5967       || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5968     *dt = vect_external_def;
5969   else
5970     {
5971       stmt_vinfo = vinfo_for_stmt (*def_stmt);
5972       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5973     }
5974 
5975   if (*dt == vect_unknown_def_type
5976       || (stmt
5977 	  && *dt == vect_double_reduction_def
5978 	  && gimple_code (stmt) != GIMPLE_PHI))
5979     {
5980       if (vect_print_dump_info (REPORT_DETAILS))
5981         fprintf (vect_dump, "Unsupported pattern.");
5982       return false;
5983     }
5984 
5985   if (vect_print_dump_info (REPORT_DETAILS))
5986     fprintf (vect_dump, "type of def: %d.",*dt);
5987 
5988   switch (gimple_code (*def_stmt))
5989     {
5990     case GIMPLE_PHI:
5991       *def = gimple_phi_result (*def_stmt);
5992       break;
5993 
5994     case GIMPLE_ASSIGN:
5995       *def = gimple_assign_lhs (*def_stmt);
5996       break;
5997 
5998     case GIMPLE_CALL:
5999       *def = gimple_call_lhs (*def_stmt);
6000       if (*def != NULL)
6001 	break;
6002       /* FALLTHRU */
6003     default:
6004       if (vect_print_dump_info (REPORT_DETAILS))
6005         fprintf (vect_dump, "unsupported defining stmt: ");
6006       return false;
6007     }
6008 
6009   return true;
6010 }
6011 
6012 /* Function vect_is_simple_use_1.
6013 
6014    Same as vect_is_simple_use_1 but also determines the vector operand
6015    type of OPERAND and stores it to *VECTYPE.  If the definition of
6016    OPERAND is vect_uninitialized_def, vect_constant_def or
6017    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6018    is responsible to compute the best suited vector type for the
6019    scalar operand.  */
6020 
6021 bool
6022 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6023 		      bb_vec_info bb_vinfo, gimple *def_stmt,
6024 		      tree *def, enum vect_def_type *dt, tree *vectype)
6025 {
6026   if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6027 			   def, dt))
6028     return false;
6029 
6030   /* Now get a vector type if the def is internal, otherwise supply
6031      NULL_TREE and leave it up to the caller to figure out a proper
6032      type for the use stmt.  */
6033   if (*dt == vect_internal_def
6034       || *dt == vect_induction_def
6035       || *dt == vect_reduction_def
6036       || *dt == vect_double_reduction_def
6037       || *dt == vect_nested_cycle)
6038     {
6039       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6040 
6041       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6042           && !STMT_VINFO_RELEVANT (stmt_info)
6043           && !STMT_VINFO_LIVE_P (stmt_info))
6044 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6045 
6046       *vectype = STMT_VINFO_VECTYPE (stmt_info);
6047       gcc_assert (*vectype != NULL_TREE);
6048     }
6049   else if (*dt == vect_uninitialized_def
6050 	   || *dt == vect_constant_def
6051 	   || *dt == vect_external_def)
6052     *vectype = NULL_TREE;
6053   else
6054     gcc_unreachable ();
6055 
6056   return true;
6057 }
6058 
6059 
6060 /* Function supportable_widening_operation
6061 
6062    Check whether an operation represented by the code CODE is a
6063    widening operation that is supported by the target platform in
6064    vector form (i.e., when operating on arguments of type VECTYPE_IN
6065    producing a result of type VECTYPE_OUT).
6066 
6067    Widening operations we currently support are NOP (CONVERT), FLOAT
6068    and WIDEN_MULT.  This function checks if these operations are supported
6069    by the target platform either directly (via vector tree-codes), or via
6070    target builtins.
6071 
6072    Output:
6073    - CODE1 and CODE2 are codes of vector operations to be used when
6074    vectorizing the operation, if available.
6075    - DECL1 and DECL2 are decls of target builtin functions to be used
6076    when vectorizing the operation, if available.  In this case,
6077    CODE1 and CODE2 are CALL_EXPR.
6078    - MULTI_STEP_CVT determines the number of required intermediate steps in
6079    case of multi-step conversion (like char->short->int - in that case
6080    MULTI_STEP_CVT will be 1).
6081    - INTERM_TYPES contains the intermediate type required to perform the
6082    widening operation (short in the above example).  */
6083 
6084 bool
6085 supportable_widening_operation (enum tree_code code, gimple stmt,
6086 				tree vectype_out, tree vectype_in,
6087                                 tree *decl1, tree *decl2,
6088                                 enum tree_code *code1, enum tree_code *code2,
6089                                 int *multi_step_cvt,
6090                                 VEC (tree, heap) **interm_types)
6091 {
6092   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6093   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6094   struct loop *vect_loop = NULL;
6095   bool ordered_p;
6096   enum machine_mode vec_mode;
6097   enum insn_code icode1, icode2;
6098   optab optab1, optab2;
6099   tree vectype = vectype_in;
6100   tree wide_vectype = vectype_out;
6101   enum tree_code c1, c2;
6102   int i;
6103   tree prev_type, intermediate_type;
6104   enum machine_mode intermediate_mode, prev_mode;
6105   optab optab3, optab4;
6106 
6107   *multi_step_cvt = 0;
6108   if (loop_info)
6109     vect_loop = LOOP_VINFO_LOOP (loop_info);
6110 
6111   /* The result of a vectorized widening operation usually requires two vectors
6112      (because the widened results do not fit into one vector). The generated
6113      vector results would normally be expected to be generated in the same
6114      order as in the original scalar computation, i.e. if 8 results are
6115      generated in each vector iteration, they are to be organized as follows:
6116         vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6117 
6118      However, in the special case that the result of the widening operation is
6119      used in a reduction computation only, the order doesn't matter (because
6120      when vectorizing a reduction we change the order of the computation).
6121      Some targets can take advantage of this and generate more efficient code.
6122      For example, targets like Altivec, that support widen_mult using a sequence
6123      of {mult_even,mult_odd} generate the following vectors:
6124         vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6125 
6126      When vectorizing outer-loops, we execute the inner-loop sequentially
6127      (each vectorized inner-loop iteration contributes to VF outer-loop
6128      iterations in parallel).  We therefore don't allow to change the order
6129      of the computation in the inner-loop during outer-loop vectorization.  */
6130 
6131    if (vect_loop
6132        && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6133        && !nested_in_vect_loop_p (vect_loop, stmt))
6134      ordered_p = false;
6135    else
6136      ordered_p = true;
6137 
6138   if (!ordered_p
6139       && code == WIDEN_MULT_EXPR
6140       && targetm.vectorize.builtin_mul_widen_even
6141       && targetm.vectorize.builtin_mul_widen_even (vectype)
6142       && targetm.vectorize.builtin_mul_widen_odd
6143       && targetm.vectorize.builtin_mul_widen_odd (vectype))
6144     {
6145       if (vect_print_dump_info (REPORT_DETAILS))
6146         fprintf (vect_dump, "Unordered widening operation detected.");
6147 
6148       *code1 = *code2 = CALL_EXPR;
6149       *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6150       *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6151       return true;
6152     }
6153 
6154   switch (code)
6155     {
6156     case WIDEN_MULT_EXPR:
6157       c1 = VEC_WIDEN_MULT_LO_EXPR;
6158       c2 = VEC_WIDEN_MULT_HI_EXPR;
6159       break;
6160 
6161     case WIDEN_LSHIFT_EXPR:
6162       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6163       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6164       break;
6165 
6166     CASE_CONVERT:
6167       c1 = VEC_UNPACK_LO_EXPR;
6168       c2 = VEC_UNPACK_HI_EXPR;
6169       break;
6170 
6171     case FLOAT_EXPR:
6172       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6173       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6174       break;
6175 
6176     case FIX_TRUNC_EXPR:
6177       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6178 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6179 	 computing the operation.  */
6180       return false;
6181 
6182     default:
6183       gcc_unreachable ();
6184     }
6185 
6186   if (BYTES_BIG_ENDIAN)
6187     {
6188       enum tree_code ctmp = c1;
6189       c1 = c2;
6190       c2 = ctmp;
6191     }
6192 
6193   if (code == FIX_TRUNC_EXPR)
6194     {
6195       /* The signedness is determined from output operand.  */
6196       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6197       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6198     }
6199   else
6200     {
6201       optab1 = optab_for_tree_code (c1, vectype, optab_default);
6202       optab2 = optab_for_tree_code (c2, vectype, optab_default);
6203     }
6204 
6205   if (!optab1 || !optab2)
6206     return false;
6207 
6208   vec_mode = TYPE_MODE (vectype);
6209   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6210        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6211     return false;
6212 
6213   *code1 = c1;
6214   *code2 = c2;
6215 
6216   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6217       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6218     return true;
6219 
6220   /* Check if it's a multi-step conversion that can be done using intermediate
6221      types.  */
6222 
6223   prev_type = vectype;
6224   prev_mode = vec_mode;
6225 
6226   if (!CONVERT_EXPR_CODE_P (code))
6227     return false;
6228 
6229   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6230      intermediate steps in promotion sequence.  We try
6231      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6232      not.  */
6233   *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6234   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6235     {
6236       intermediate_mode = insn_data[icode1].operand[0].mode;
6237       intermediate_type
6238 	= lang_hooks.types.type_for_mode (intermediate_mode,
6239 					  TYPE_UNSIGNED (prev_type));
6240       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6241       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6242 
6243       if (!optab3 || !optab4
6244           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6245 	  || insn_data[icode1].operand[0].mode != intermediate_mode
6246 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6247 	  || insn_data[icode2].operand[0].mode != intermediate_mode
6248 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
6249 	      == CODE_FOR_nothing)
6250 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
6251 	      == CODE_FOR_nothing))
6252 	break;
6253 
6254       VEC_quick_push (tree, *interm_types, intermediate_type);
6255       (*multi_step_cvt)++;
6256 
6257       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6258 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6259 	return true;
6260 
6261       prev_type = intermediate_type;
6262       prev_mode = intermediate_mode;
6263     }
6264 
6265   VEC_free (tree, heap, *interm_types);
6266   return false;
6267 }
6268 
6269 
6270 /* Function supportable_narrowing_operation
6271 
6272    Check whether an operation represented by the code CODE is a
6273    narrowing operation that is supported by the target platform in
6274    vector form (i.e., when operating on arguments of type VECTYPE_IN
6275    and producing a result of type VECTYPE_OUT).
6276 
6277    Narrowing operations we currently support are NOP (CONVERT) and
6278    FIX_TRUNC.  This function checks if these operations are supported by
6279    the target platform directly via vector tree-codes.
6280 
6281    Output:
6282    - CODE1 is the code of a vector operation to be used when
6283    vectorizing the operation, if available.
6284    - MULTI_STEP_CVT determines the number of required intermediate steps in
6285    case of multi-step conversion (like int->short->char - in that case
6286    MULTI_STEP_CVT will be 1).
6287    - INTERM_TYPES contains the intermediate type required to perform the
6288    narrowing operation (short in the above example).   */
6289 
6290 bool
6291 supportable_narrowing_operation (enum tree_code code,
6292 				 tree vectype_out, tree vectype_in,
6293 				 enum tree_code *code1, int *multi_step_cvt,
6294                                  VEC (tree, heap) **interm_types)
6295 {
6296   enum machine_mode vec_mode;
6297   enum insn_code icode1;
6298   optab optab1, interm_optab;
6299   tree vectype = vectype_in;
6300   tree narrow_vectype = vectype_out;
6301   enum tree_code c1;
6302   tree intermediate_type;
6303   enum machine_mode intermediate_mode, prev_mode;
6304   int i;
6305   bool uns;
6306 
6307   *multi_step_cvt = 0;
6308   switch (code)
6309     {
6310     CASE_CONVERT:
6311       c1 = VEC_PACK_TRUNC_EXPR;
6312       break;
6313 
6314     case FIX_TRUNC_EXPR:
6315       c1 = VEC_PACK_FIX_TRUNC_EXPR;
6316       break;
6317 
6318     case FLOAT_EXPR:
6319       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6320 	 tree code and optabs used for computing the operation.  */
6321       return false;
6322 
6323     default:
6324       gcc_unreachable ();
6325     }
6326 
6327   if (code == FIX_TRUNC_EXPR)
6328     /* The signedness is determined from output operand.  */
6329     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6330   else
6331     optab1 = optab_for_tree_code (c1, vectype, optab_default);
6332 
6333   if (!optab1)
6334     return false;
6335 
6336   vec_mode = TYPE_MODE (vectype);
6337   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6338     return false;
6339 
6340   *code1 = c1;
6341 
6342   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6343     return true;
6344 
6345   /* Check if it's a multi-step conversion that can be done using intermediate
6346      types.  */
6347   prev_mode = vec_mode;
6348   if (code == FIX_TRUNC_EXPR)
6349     uns = TYPE_UNSIGNED (vectype_out);
6350   else
6351     uns = TYPE_UNSIGNED (vectype);
6352 
6353   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6354      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6355      costly than signed.  */
6356   if (code == FIX_TRUNC_EXPR && uns)
6357     {
6358       enum insn_code icode2;
6359 
6360       intermediate_type
6361 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6362       interm_optab
6363 	= optab_for_tree_code (c1, intermediate_type, optab_default);
6364       if (interm_optab != NULL
6365 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6366 	  && insn_data[icode1].operand[0].mode
6367 	     == insn_data[icode2].operand[0].mode)
6368 	{
6369 	  uns = false;
6370 	  optab1 = interm_optab;
6371 	  icode1 = icode2;
6372 	}
6373     }
6374 
6375   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6376      intermediate steps in promotion sequence.  We try
6377      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
6378   *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6379   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6380     {
6381       intermediate_mode = insn_data[icode1].operand[0].mode;
6382       intermediate_type
6383 	= lang_hooks.types.type_for_mode (intermediate_mode, uns);
6384       interm_optab
6385 	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6386 			       optab_default);
6387       if (!interm_optab
6388 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6389 	  || insn_data[icode1].operand[0].mode != intermediate_mode
6390 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6391 	      == CODE_FOR_nothing))
6392 	break;
6393 
6394       VEC_quick_push (tree, *interm_types, intermediate_type);
6395       (*multi_step_cvt)++;
6396 
6397       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6398 	return true;
6399 
6400       prev_mode = intermediate_mode;
6401       optab1 = interm_optab;
6402     }
6403 
6404   VEC_free (tree, heap, *interm_types);
6405   return false;
6406 }
6407