1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
55 
56 /* For lang_hooks.types.type_for_mode.  */
57 #include "langhooks.h"
58 
59 /* Return the vectorized type for the given statement.  */
60 
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 {
64   return STMT_VINFO_VECTYPE (stmt_info);
65 }
66 
67 /* Return TRUE iff the given statement is in an inner loop relative to
68    the loop being vectorized.  */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 {
72   gimple *stmt = STMT_VINFO_STMT (stmt_info);
73   basic_block bb = gimple_bb (stmt);
74   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75   struct loop* loop;
76 
77   if (!loop_vinfo)
78     return false;
79 
80   loop = LOOP_VINFO_LOOP (loop_vinfo);
81 
82   return (bb->loop_father == loop->inner);
83 }
84 
85 /* Record the cost of a statement, either by directly informing the
86    target model or by saving it in a vector for later processing.
87    Return a preliminary estimate of the statement's cost.  */
88 
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 		  int misalign, enum vect_cost_model_location where)
93 {
94   if ((kind == vector_load || kind == unaligned_load)
95       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96     kind = vector_gather_load;
97   if ((kind == vector_store || kind == unaligned_store)
98       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99     kind = vector_scatter_store;
100   if (body_cost_vec)
101     {
102       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103       stmt_info_for_cost si = { count, kind,
104 			        stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 				misalign };
106       body_cost_vec->safe_push (si);
107       return (unsigned)
108 	(builtin_vectorization_cost (kind, vectype, misalign) * count);
109     }
110   else
111     return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 			  count, kind, stmt_info, misalign, where);
113 }
114 
115 /* Return a variable of type ELEM_TYPE[NELEMS].  */
116 
117 static tree
118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119 {
120   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 			 "vect_array");
122 }
123 
124 /* ARRAY is an array of vectors created by create_vector_array.
125    Return an SSA_NAME for the vector in index N.  The reference
126    is part of the vectorization of STMT and the vector is associated
127    with scalar destination SCALAR_DEST.  */
128 
129 static tree
130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 		   tree array, unsigned HOST_WIDE_INT n)
132 {
133   tree vect_type, vect, vect_name, array_ref;
134   gimple *new_stmt;
135 
136   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137   vect_type = TREE_TYPE (TREE_TYPE (array));
138   vect = vect_create_destination_var (scalar_dest, vect_type);
139   array_ref = build4 (ARRAY_REF, vect_type, array,
140 		      build_int_cst (size_type_node, n),
141 		      NULL_TREE, NULL_TREE);
142 
143   new_stmt = gimple_build_assign (vect, array_ref);
144   vect_name = make_ssa_name (vect, new_stmt);
145   gimple_assign_set_lhs (new_stmt, vect_name);
146   vect_finish_stmt_generation (stmt, new_stmt, gsi);
147 
148   return vect_name;
149 }
150 
151 /* ARRAY is an array of vectors created by create_vector_array.
152    Emit code to store SSA_NAME VECT in index N of the array.
153    The store is part of the vectorization of STMT.  */
154 
155 static void
156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 		    tree array, unsigned HOST_WIDE_INT n)
158 {
159   tree array_ref;
160   gimple *new_stmt;
161 
162   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 		      build_int_cst (size_type_node, n),
164 		      NULL_TREE, NULL_TREE);
165 
166   new_stmt = gimple_build_assign (array_ref, vect);
167   vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 }
169 
170 /* PTR is a pointer to an array of type TYPE.  Return a representation
171    of *PTR.  The memory reference replaces those in FIRST_DR
172    (and its group).  */
173 
174 static tree
175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 {
177   tree mem_ref;
178 
179   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180   /* Arrays have the same alignment as their type.  */
181   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182   return mem_ref;
183 }
184 
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
186 
187 /* Function vect_mark_relevant.
188 
189    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
190 
191 static void
192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193 		    enum vect_relevant relevant, bool live_p)
194 {
195   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198   gimple *pattern_stmt;
199 
200   if (dump_enabled_p ())
201     {
202       dump_printf_loc (MSG_NOTE, vect_location,
203 		       "mark relevant %d, live %d: ", relevant, live_p);
204       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205     }
206 
207   /* If this stmt is an original stmt in a pattern, we might need to mark its
208      related pattern stmt instead of the original stmt.  However, such stmts
209      may have their own uses that are not in any pattern, in such cases the
210      stmt itself should be marked.  */
211   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212     {
213       /* This is the last stmt in a sequence that was detected as a
214 	 pattern that can potentially be vectorized.  Don't mark the stmt
215 	 as relevant/live because it's not going to be vectorized.
216 	 Instead mark the pattern-stmt that replaces it.  */
217 
218       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219 
220       if (dump_enabled_p ())
221 	dump_printf_loc (MSG_NOTE, vect_location,
222 			 "last stmt in pattern. don't mark"
223 			 " relevant/live.\n");
224       stmt_info = vinfo_for_stmt (pattern_stmt);
225       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228       stmt = pattern_stmt;
229     }
230 
231   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233     STMT_VINFO_RELEVANT (stmt_info) = relevant;
234 
235   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237     {
238       if (dump_enabled_p ())
239         dump_printf_loc (MSG_NOTE, vect_location,
240                          "already marked relevant/live.\n");
241       return;
242     }
243 
244   worklist->safe_push (stmt);
245 }
246 
247 
248 /* Function is_simple_and_all_uses_invariant
249 
250    Return true if STMT is simple and all uses of it are invariant.  */
251 
252 bool
253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254 {
255   tree op;
256   gimple *def_stmt;
257   ssa_op_iter iter;
258 
259   if (!is_gimple_assign (stmt))
260     return false;
261 
262   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263     {
264       enum vect_def_type dt = vect_uninitialized_def;
265 
266       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267 	{
268 	  if (dump_enabled_p ())
269 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 			     "use not simple.\n");
271 	  return false;
272 	}
273 
274       if (dt != vect_external_def && dt != vect_constant_def)
275 	return false;
276     }
277   return true;
278 }
279 
280 /* Function vect_stmt_relevant_p.
281 
282    Return true if STMT in loop that is represented by LOOP_VINFO is
283    "relevant for vectorization".
284 
285    A stmt is considered "relevant for vectorization" if:
286    - it has uses outside the loop.
287    - it has vdefs (it alters memory).
288    - control stmts in the loop (except for the exit condition).
289 
290    CHECKME: what other side effects would the vectorizer allow?  */
291 
292 static bool
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 		      enum vect_relevant *relevant, bool *live_p)
295 {
296   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297   ssa_op_iter op_iter;
298   imm_use_iterator imm_iter;
299   use_operand_p use_p;
300   def_operand_p def_p;
301 
302   *relevant = vect_unused_in_scope;
303   *live_p = false;
304 
305   /* cond stmt other than loop exit cond.  */
306   if (is_ctrl_stmt (stmt)
307       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308          != loop_exit_ctrl_vec_info_type)
309     *relevant = vect_used_in_scope;
310 
311   /* changing memory.  */
312   if (gimple_code (stmt) != GIMPLE_PHI)
313     if (gimple_vdef (stmt)
314 	&& !gimple_clobber_p (stmt))
315       {
316 	if (dump_enabled_p ())
317 	  dump_printf_loc (MSG_NOTE, vect_location,
318                            "vec_stmt_relevant_p: stmt has vdefs.\n");
319 	*relevant = vect_used_in_scope;
320       }
321 
322   /* uses outside the loop.  */
323   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324     {
325       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 	{
327 	  basic_block bb = gimple_bb (USE_STMT (use_p));
328 	  if (!flow_bb_inside_loop_p (loop, bb))
329 	    {
330 	      if (dump_enabled_p ())
331 		dump_printf_loc (MSG_NOTE, vect_location,
332                                  "vec_stmt_relevant_p: used out of loop.\n");
333 
334 	      if (is_gimple_debug (USE_STMT (use_p)))
335 		continue;
336 
337 	      /* We expect all such uses to be in the loop exit phis
338 		 (because of loop closed form)   */
339 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 	      gcc_assert (bb == single_exit (loop)->dest);
341 
342               *live_p = true;
343 	    }
344 	}
345     }
346 
347   if (*live_p && *relevant == vect_unused_in_scope
348       && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
349     {
350       if (dump_enabled_p ())
351 	dump_printf_loc (MSG_NOTE, vect_location,
352 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353       *relevant = vect_used_only_live;
354     }
355 
356   return (*live_p || *relevant);
357 }
358 
359 
360 /* Function exist_non_indexing_operands_for_use_p
361 
362    USE is one of the uses attached to STMT.  Check if USE is
363    used in STMT for anything other than indexing an array.  */
364 
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 {
368   tree operand;
369   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
370 
371   /* USE corresponds to some operand in STMT.  If there is no data
372      reference in STMT, then any operand that corresponds to USE
373      is not indexing an array.  */
374   if (!STMT_VINFO_DATA_REF (stmt_info))
375     return true;
376 
377   /* STMT has a data_ref. FORNOW this means that its of one of
378      the following forms:
379      -1- ARRAY_REF = var
380      -2- var = ARRAY_REF
381      (This should have been verified in analyze_data_refs).
382 
383      'var' in the second case corresponds to a def, not a use,
384      so USE cannot correspond to any operands that are not used
385      for array indexing.
386 
387      Therefore, all we need to check is if STMT falls into the
388      first case, and whether var corresponds to USE.  */
389 
390   if (!gimple_assign_copy_p (stmt))
391     {
392       if (is_gimple_call (stmt)
393 	  && gimple_call_internal_p (stmt))
394 	{
395 	  internal_fn ifn = gimple_call_internal_fn (stmt);
396 	  int mask_index = internal_fn_mask_index (ifn);
397 	  if (mask_index >= 0
398 	      && use == gimple_call_arg (stmt, mask_index))
399 	    return true;
400 	  int stored_value_index = internal_fn_stored_value_index (ifn);
401 	  if (stored_value_index >= 0
402 	      && use == gimple_call_arg (stmt, stored_value_index))
403 	    return true;
404 	  if (internal_gather_scatter_fn_p (ifn)
405 	      && use == gimple_call_arg (stmt, 1))
406 	    return true;
407 	}
408       return false;
409     }
410 
411   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412     return false;
413   operand = gimple_assign_rhs1 (stmt);
414   if (TREE_CODE (operand) != SSA_NAME)
415     return false;
416 
417   if (operand == use)
418     return true;
419 
420   return false;
421 }
422 
423 
424 /*
425    Function process_use.
426 
427    Inputs:
428    - a USE in STMT in a loop represented by LOOP_VINFO
429    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430      that defined USE.  This is done by calling mark_relevant and passing it
431      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433      be performed.
434 
435    Outputs:
436    Generally, LIVE_P and RELEVANT are used to define the liveness and
437    relevance info of the DEF_STMT of this USE:
438        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440    Exceptions:
441    - case 1: If USE is used only for address computations (e.g. array indexing),
442    which does not need to be directly vectorized, then the liveness/relevance
443    of the respective DEF_STMT is left unchanged.
444    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445    skip DEF_STMT cause it had already been processed.
446    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
447    be modified accordingly.
448 
449    Return true if everything is as expected. Return false otherwise.  */
450 
451 static bool
452 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
453 	     enum vect_relevant relevant, vec<gimple *> *worklist,
454 	     bool force)
455 {
456   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458   stmt_vec_info dstmt_vinfo;
459   basic_block bb, def_bb;
460   gimple *def_stmt;
461   enum vect_def_type dt;
462 
463   /* case 1: we are only interested in uses that need to be vectorized.  Uses
464      that are used for address computation are not considered relevant.  */
465   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
466      return true;
467 
468   if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469     {
470       if (dump_enabled_p ())
471         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472                          "not vectorized: unsupported use in stmt.\n");
473       return false;
474     }
475 
476   if (!def_stmt || gimple_nop_p (def_stmt))
477     return true;
478 
479   def_bb = gimple_bb (def_stmt);
480   if (!flow_bb_inside_loop_p (loop, def_bb))
481     {
482       if (dump_enabled_p ())
483 	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
484       return true;
485     }
486 
487   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488      DEF_STMT must have already been processed, because this should be the
489      only way that STMT, which is a reduction-phi, was put in the worklist,
490      as there should be no other uses for DEF_STMT in the loop.  So we just
491      check that everything is as expected, and we are done.  */
492   dstmt_vinfo = vinfo_for_stmt (def_stmt);
493   bb = gimple_bb (stmt);
494   if (gimple_code (stmt) == GIMPLE_PHI
495       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
496       && gimple_code (def_stmt) != GIMPLE_PHI
497       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
498       && bb->loop_father == def_bb->loop_father)
499     {
500       if (dump_enabled_p ())
501 	dump_printf_loc (MSG_NOTE, vect_location,
502                          "reduc-stmt defining reduc-phi in the same nest.\n");
503       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
504 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
505       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
506       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
507 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
508       return true;
509     }
510 
511   /* case 3a: outer-loop stmt defining an inner-loop stmt:
512 	outer-loop-header-bb:
513 		d = def_stmt
514 	inner-loop:
515 		stmt # use (d)
516 	outer-loop-tail-bb:
517 		...		  */
518   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519     {
520       if (dump_enabled_p ())
521 	dump_printf_loc (MSG_NOTE, vect_location,
522                          "outer-loop def-stmt defining inner-loop stmt.\n");
523 
524       switch (relevant)
525 	{
526 	case vect_unused_in_scope:
527 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
528 		      vect_used_in_scope : vect_unused_in_scope;
529 	  break;
530 
531 	case vect_used_in_outer_by_reduction:
532           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
533 	  relevant = vect_used_by_reduction;
534 	  break;
535 
536 	case vect_used_in_outer:
537           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 	  relevant = vect_used_in_scope;
539 	  break;
540 
541 	case vect_used_in_scope:
542 	  break;
543 
544 	default:
545 	  gcc_unreachable ();
546 	}
547     }
548 
549   /* case 3b: inner-loop stmt defining an outer-loop stmt:
550 	outer-loop-header-bb:
551 		...
552 	inner-loop:
553 		d = def_stmt
554 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 		stmt # use (d)		*/
556   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557     {
558       if (dump_enabled_p ())
559 	dump_printf_loc (MSG_NOTE, vect_location,
560                          "inner-loop def-stmt defining outer-loop stmt.\n");
561 
562       switch (relevant)
563         {
564         case vect_unused_in_scope:
565           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
566             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
567                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
568           break;
569 
570         case vect_used_by_reduction:
571 	case vect_used_only_live:
572           relevant = vect_used_in_outer_by_reduction;
573           break;
574 
575         case vect_used_in_scope:
576           relevant = vect_used_in_outer;
577           break;
578 
579         default:
580           gcc_unreachable ();
581         }
582     }
583   /* We are also not interested in uses on loop PHI backedges that are
584      inductions.  Otherwise we'll needlessly vectorize the IV increment
585      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
586      of course.  */
587   else if (gimple_code (stmt) == GIMPLE_PHI
588 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
589 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
590 	   && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
591 	       == use))
592     {
593       if (dump_enabled_p ())
594 	dump_printf_loc (MSG_NOTE, vect_location,
595                          "induction value on backedge.\n");
596       return true;
597     }
598 
599 
600   vect_mark_relevant (worklist, def_stmt, relevant, false);
601   return true;
602 }
603 
604 
605 /* Function vect_mark_stmts_to_be_vectorized.
606 
607    Not all stmts in the loop need to be vectorized. For example:
608 
609      for i...
610        for j...
611    1.    T0 = i + j
612    2.	 T1 = a[T0]
613 
614    3.    j = j + 1
615 
616    Stmt 1 and 3 do not need to be vectorized, because loop control and
617    addressing of vectorized data-refs are handled differently.
618 
619    This pass detects such stmts.  */
620 
621 bool
622 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 {
624   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
625   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
626   unsigned int nbbs = loop->num_nodes;
627   gimple_stmt_iterator si;
628   gimple *stmt;
629   unsigned int i;
630   stmt_vec_info stmt_vinfo;
631   basic_block bb;
632   gimple *phi;
633   bool live_p;
634   enum vect_relevant relevant;
635 
636   if (dump_enabled_p ())
637     dump_printf_loc (MSG_NOTE, vect_location,
638                      "=== vect_mark_stmts_to_be_vectorized ===\n");
639 
640   auto_vec<gimple *, 64> worklist;
641 
642   /* 1. Init worklist.  */
643   for (i = 0; i < nbbs; i++)
644     {
645       bb = bbs[i];
646       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 	{
648 	  phi = gsi_stmt (si);
649 	  if (dump_enabled_p ())
650 	    {
651 	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
652 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
653 	    }
654 
655 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
656 	    vect_mark_relevant (&worklist, phi, relevant, live_p);
657 	}
658       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 	{
660 	  stmt = gsi_stmt (si);
661 	  if (dump_enabled_p ())
662 	    {
663 	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
664 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
665 	    }
666 
667 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
668 	    vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 	}
670     }
671 
672   /* 2. Process_worklist */
673   while (worklist.length () > 0)
674     {
675       use_operand_p use_p;
676       ssa_op_iter iter;
677 
678       stmt = worklist.pop ();
679       if (dump_enabled_p ())
680 	{
681           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
682           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
683 	}
684 
685       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
686 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
687 	 of STMT.  */
688       stmt_vinfo = vinfo_for_stmt (stmt);
689       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 
691       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
692 	 propagated as is to the DEF_STMTs of its USEs.
693 
694 	 One exception is when STMT has been identified as defining a reduction
695 	 variable; in this case we set the relevance to vect_used_by_reduction.
696 	 This is because we distinguish between two kinds of relevant stmts -
697 	 those that are used by a reduction computation, and those that are
698 	 (also) used by a regular computation.  This allows us later on to
699 	 identify stmts that are used solely by a reduction, and therefore the
700 	 order of the results that they produce does not have to be kept.  */
701 
702       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703         {
704           case vect_reduction_def:
705 	    gcc_assert (relevant != vect_unused_in_scope);
706 	    if (relevant != vect_unused_in_scope
707 		&& relevant != vect_used_in_scope
708 		&& relevant != vect_used_by_reduction
709 		&& relevant != vect_used_only_live)
710 	      {
711 		if (dump_enabled_p ())
712 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 				   "unsupported use of reduction.\n");
714 		return false;
715 	      }
716 	    break;
717 
718           case vect_nested_cycle:
719 	    if (relevant != vect_unused_in_scope
720 		&& relevant != vect_used_in_outer_by_reduction
721 		&& relevant != vect_used_in_outer)
722               {
723                 if (dump_enabled_p ())
724                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725                                    "unsupported use of nested cycle.\n");
726 
727                 return false;
728               }
729             break;
730 
731           case vect_double_reduction_def:
732 	    if (relevant != vect_unused_in_scope
733 		&& relevant != vect_used_by_reduction
734 		&& relevant != vect_used_only_live)
735               {
736                 if (dump_enabled_p ())
737                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738                                    "unsupported use of double reduction.\n");
739 
740                 return false;
741               }
742             break;
743 
744           default:
745             break;
746         }
747 
748       if (is_pattern_stmt_p (stmt_vinfo))
749         {
750           /* Pattern statements are not inserted into the code, so
751              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
752              have to scan the RHS or function arguments instead.  */
753           if (is_gimple_assign (stmt))
754             {
755 	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
756 	      tree op = gimple_assign_rhs1 (stmt);
757 
758 	      i = 1;
759 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 		{
761 		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
762 				    relevant, &worklist, false)
763 		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
764 				       relevant, &worklist, false))
765 		    return false;
766 		  i = 2;
767 		}
768 	      for (; i < gimple_num_ops (stmt); i++)
769                 {
770 		  op = gimple_op (stmt, i);
771                   if (TREE_CODE (op) == SSA_NAME
772 		      && !process_use (stmt, op, loop_vinfo, relevant,
773 				       &worklist, false))
774                     return false;
775                  }
776             }
777           else if (is_gimple_call (stmt))
778             {
779               for (i = 0; i < gimple_call_num_args (stmt); i++)
780                 {
781                   tree arg = gimple_call_arg (stmt, i);
782 		  if (!process_use (stmt, arg, loop_vinfo, relevant,
783 				    &worklist, false))
784                     return false;
785                 }
786             }
787         }
788       else
789         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790           {
791             tree op = USE_FROM_PTR (use_p);
792 	    if (!process_use (stmt, op, loop_vinfo, relevant,
793 			      &worklist, false))
794               return false;
795           }
796 
797       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 	{
799 	  gather_scatter_info gs_info;
800 	  if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
801 	    gcc_unreachable ();
802 	  if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
803 			    &worklist, true))
804 	    return false;
805 	}
806     } /* while worklist */
807 
808   return true;
809 }
810 
811 
812 /* Function vect_model_simple_cost.
813 
814    Models cost for simple operations, i.e. those that only emit ncopies of a
815    single op.  Right now, this does not account for multiple insns that could
816    be generated for the single vector op.  We will handle that shortly.  */
817 
818 void
819 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
820 			enum vect_def_type *dt,
821 			int ndts,
822 			stmt_vector_for_cost *prologue_cost_vec,
823 			stmt_vector_for_cost *body_cost_vec)
824 {
825   int i;
826   int inside_cost = 0, prologue_cost = 0;
827 
828   /* The SLP costs were already calculated during SLP tree build.  */
829   gcc_assert (!PURE_SLP_STMT (stmt_info));
830 
831   /* Cost the "broadcast" of a scalar operand in to a vector operand.
832      Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833      cost model.  */
834   for (i = 0; i < ndts; i++)
835     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 					 stmt_info, 0, vect_prologue);
838 
839   /* Pass the inside-of-loop statements to the target-specific cost model.  */
840   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 				  stmt_info, 0, vect_body);
842 
843   if (dump_enabled_p ())
844     dump_printf_loc (MSG_NOTE, vect_location,
845                      "vect_model_simple_cost: inside_cost = %d, "
846                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
847 }
848 
849 
850 /* Model cost for type demotion and promotion operations.  PWR is normally
851    zero for single-step promotions and demotions.  It will be one if
852    two-step promotion/demotion is required, and so on.  Each additional
853    step doubles the number of instructions required.  */
854 
855 static void
856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 				    enum vect_def_type *dt, int pwr)
858 {
859   int i, tmp;
860   int inside_cost = 0, prologue_cost = 0;
861   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863   void *target_cost_data;
864 
865   /* The SLP costs were already calculated during SLP tree build.  */
866   gcc_assert (!PURE_SLP_STMT (stmt_info));
867 
868   if (loop_vinfo)
869     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
870   else
871     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 
873   for (i = 0; i < pwr + 1; i++)
874     {
875       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
876 	(i + 1) : i;
877       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
878 				    vec_promote_demote, stmt_info, 0,
879 				    vect_body);
880     }
881 
882   /* FORNOW: Assuming maximum 2 args per stmts.  */
883   for (i = 0; i < 2; i++)
884     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
885       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
886 				      stmt_info, 0, vect_prologue);
887 
888   if (dump_enabled_p ())
889     dump_printf_loc (MSG_NOTE, vect_location,
890                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
891                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 }
893 
894 /* Function vect_model_store_cost
895 
896    Models cost for stores.  In the case of grouped accesses, one access
897    has the overhead of the grouped access attributed to it.  */
898 
899 void
900 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
901 		       vect_memory_access_type memory_access_type,
902 		       vec_load_store_type vls_type, slp_tree slp_node,
903 		       stmt_vector_for_cost *prologue_cost_vec,
904 		       stmt_vector_for_cost *body_cost_vec)
905 {
906   unsigned int inside_cost = 0, prologue_cost = 0;
907   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
908   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
909   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 
911   if (vls_type == VLS_STORE_INVARIANT)
912     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
913 				       stmt_info, 0, vect_prologue);
914 
915   /* Grouped stores update all elements in the group at once,
916      so we want the DR for the first statement.  */
917   if (!slp_node && grouped_access_p)
918     {
919       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
920       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
921     }
922 
923   /* True if we should include any once-per-group costs as well as
924      the cost of the statement itself.  For SLP we only get called
925      once per group anyhow.  */
926   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 
928   /* We assume that the cost of a single store-lanes instruction is
929      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
930      access is instead being provided by a permute-and-store operation,
931      include the cost of the permutes.  */
932   if (first_stmt_p
933       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934     {
935       /* Uses a high and low interleave or shuffle operations for each
936 	 needed permute.  */
937       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
938       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
939       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
940 				      stmt_info, 0, vect_body);
941 
942       if (dump_enabled_p ())
943         dump_printf_loc (MSG_NOTE, vect_location,
944                          "vect_model_store_cost: strided group_size = %d .\n",
945                          group_size);
946     }
947 
948   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
949   /* Costs of the stores.  */
950   if (memory_access_type == VMAT_ELEMENTWISE
951       || memory_access_type == VMAT_GATHER_SCATTER)
952     {
953       /* N scalar stores plus extracting the elements.  */
954       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
955       inside_cost += record_stmt_cost (body_cost_vec,
956 				       ncopies * assumed_nunits,
957 				       scalar_store, stmt_info, 0, vect_body);
958     }
959   else
960     vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
961 
962   if (memory_access_type == VMAT_ELEMENTWISE
963       || memory_access_type == VMAT_STRIDED_SLP)
964     {
965       /* N scalar stores plus extracting the elements.  */
966       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967       inside_cost += record_stmt_cost (body_cost_vec,
968 				       ncopies * assumed_nunits,
969 				       vec_to_scalar, stmt_info, 0, vect_body);
970     }
971 
972   if (dump_enabled_p ())
973     dump_printf_loc (MSG_NOTE, vect_location,
974                      "vect_model_store_cost: inside_cost = %d, "
975                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
976 }
977 
978 
979 /* Calculate cost of DR's memory access.  */
980 void
981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 		     unsigned int *inside_cost,
983 		     stmt_vector_for_cost *body_cost_vec)
984 {
985   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986   gimple *stmt = DR_STMT (dr);
987   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
988 
989   switch (alignment_support_scheme)
990     {
991     case dr_aligned:
992       {
993 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 					  vector_store, stmt_info, 0,
995 					  vect_body);
996 
997         if (dump_enabled_p ())
998           dump_printf_loc (MSG_NOTE, vect_location,
999                            "vect_model_store_cost: aligned.\n");
1000         break;
1001       }
1002 
1003     case dr_unaligned_supported:
1004       {
1005         /* Here, we assign an additional cost for the unaligned store.  */
1006 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 					  unaligned_store, stmt_info,
1008 					  DR_MISALIGNMENT (dr), vect_body);
1009         if (dump_enabled_p ())
1010           dump_printf_loc (MSG_NOTE, vect_location,
1011                            "vect_model_store_cost: unaligned supported by "
1012                            "hardware.\n");
1013         break;
1014       }
1015 
1016     case dr_unaligned_unsupported:
1017       {
1018         *inside_cost = VECT_MAX_COST;
1019 
1020         if (dump_enabled_p ())
1021           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022                            "vect_model_store_cost: unsupported access.\n");
1023         break;
1024       }
1025 
1026     default:
1027       gcc_unreachable ();
1028     }
1029 }
1030 
1031 
1032 /* Function vect_model_load_cost
1033 
1034    Models cost for loads.  In the case of grouped accesses, one access has
1035    the overhead of the grouped access attributed to it.  Since unaligned
1036    accesses are supported for loads, we also account for the costs of the
1037    access scheme chosen.  */
1038 
1039 void
1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 		      vect_memory_access_type memory_access_type,
1042 		      slp_tree slp_node,
1043 		      stmt_vector_for_cost *prologue_cost_vec,
1044 		      stmt_vector_for_cost *body_cost_vec)
1045 {
1046   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1048   unsigned int inside_cost = 0, prologue_cost = 0;
1049   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1050 
1051   /* Grouped loads read all elements in the group at once,
1052      so we want the DR for the first statement.  */
1053   if (!slp_node && grouped_access_p)
1054     {
1055       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1057     }
1058 
1059   /* True if we should include any once-per-group costs as well as
1060      the cost of the statement itself.  For SLP we only get called
1061      once per group anyhow.  */
1062   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1063 
1064   /* We assume that the cost of a single load-lanes instruction is
1065      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1066      access is instead being provided by a load-and-permute operation,
1067      include the cost of the permutes.  */
1068   if (first_stmt_p
1069       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1070     {
1071       /* Uses an even and odd extract operations or shuffle operations
1072 	 for each needed permute.  */
1073       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1074       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 				      stmt_info, 0, vect_body);
1077 
1078       if (dump_enabled_p ())
1079         dump_printf_loc (MSG_NOTE, vect_location,
1080                          "vect_model_load_cost: strided group_size = %d .\n",
1081                          group_size);
1082     }
1083 
1084   /* The loads themselves.  */
1085   if (memory_access_type == VMAT_ELEMENTWISE
1086       || memory_access_type == VMAT_GATHER_SCATTER)
1087     {
1088       /* N scalar loads plus gathering them into a vector.  */
1089       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1091       inside_cost += record_stmt_cost (body_cost_vec,
1092 				       ncopies * assumed_nunits,
1093 				       scalar_load, stmt_info, 0, vect_body);
1094     }
1095   else
1096     vect_get_load_cost (dr, ncopies, first_stmt_p,
1097 			&inside_cost, &prologue_cost,
1098 			prologue_cost_vec, body_cost_vec, true);
1099   if (memory_access_type == VMAT_ELEMENTWISE
1100       || memory_access_type == VMAT_STRIDED_SLP)
1101     inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 				     stmt_info, 0, vect_body);
1103 
1104   if (dump_enabled_p ())
1105     dump_printf_loc (MSG_NOTE, vect_location,
1106                      "vect_model_load_cost: inside_cost = %d, "
1107                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1108 }
1109 
1110 
1111 /* Calculate cost of DR's memory access.  */
1112 void
1113 vect_get_load_cost (struct data_reference *dr, int ncopies,
1114 		    bool add_realign_cost, unsigned int *inside_cost,
1115 		    unsigned int *prologue_cost,
1116 		    stmt_vector_for_cost *prologue_cost_vec,
1117 		    stmt_vector_for_cost *body_cost_vec,
1118 		    bool record_prologue_costs)
1119 {
1120   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1121   gimple *stmt = DR_STMT (dr);
1122   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 
1124   switch (alignment_support_scheme)
1125     {
1126     case dr_aligned:
1127       {
1128 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129 					  stmt_info, 0, vect_body);
1130 
1131         if (dump_enabled_p ())
1132           dump_printf_loc (MSG_NOTE, vect_location,
1133                            "vect_model_load_cost: aligned.\n");
1134 
1135         break;
1136       }
1137     case dr_unaligned_supported:
1138       {
1139         /* Here, we assign an additional cost for the unaligned load.  */
1140 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1141 					  unaligned_load, stmt_info,
1142 					  DR_MISALIGNMENT (dr), vect_body);
1143 
1144         if (dump_enabled_p ())
1145           dump_printf_loc (MSG_NOTE, vect_location,
1146                            "vect_model_load_cost: unaligned supported by "
1147                            "hardware.\n");
1148 
1149         break;
1150       }
1151     case dr_explicit_realign:
1152       {
1153 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154 					  vector_load, stmt_info, 0, vect_body);
1155 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156 					  vec_perm, stmt_info, 0, vect_body);
1157 
1158         /* FIXME: If the misalignment remains fixed across the iterations of
1159            the containing loop, the following cost should be added to the
1160            prologue costs.  */
1161         if (targetm.vectorize.builtin_mask_for_load)
1162 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163 					    stmt_info, 0, vect_body);
1164 
1165         if (dump_enabled_p ())
1166           dump_printf_loc (MSG_NOTE, vect_location,
1167                            "vect_model_load_cost: explicit realign\n");
1168 
1169         break;
1170       }
1171     case dr_explicit_realign_optimized:
1172       {
1173         if (dump_enabled_p ())
1174           dump_printf_loc (MSG_NOTE, vect_location,
1175                            "vect_model_load_cost: unaligned software "
1176                            "pipelined.\n");
1177 
1178         /* Unaligned software pipeline has a load of an address, an initial
1179            load, and possibly a mask operation to "prime" the loop.  However,
1180            if this is an access in a group of loads, which provide grouped
1181            access, then the above cost should only be considered for one
1182            access in the group.  Inside the loop, there is a load op
1183            and a realignment op.  */
1184 
1185         if (add_realign_cost && record_prologue_costs)
1186           {
1187 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188 						vector_stmt, stmt_info,
1189 						0, vect_prologue);
1190             if (targetm.vectorize.builtin_mask_for_load)
1191 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192 						  vector_stmt, stmt_info,
1193 						  0, vect_prologue);
1194           }
1195 
1196 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197 					  stmt_info, 0, vect_body);
1198 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199 					  stmt_info, 0, vect_body);
1200 
1201         if (dump_enabled_p ())
1202           dump_printf_loc (MSG_NOTE, vect_location,
1203                            "vect_model_load_cost: explicit realign optimized"
1204                            "\n");
1205 
1206         break;
1207       }
1208 
1209     case dr_unaligned_unsupported:
1210       {
1211         *inside_cost = VECT_MAX_COST;
1212 
1213         if (dump_enabled_p ())
1214           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1215                            "vect_model_load_cost: unsupported access.\n");
1216         break;
1217       }
1218 
1219     default:
1220       gcc_unreachable ();
1221     }
1222 }
1223 
1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225    the loop preheader for the vectorized stmt STMT.  */
1226 
1227 static void
1228 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229 {
1230   if (gsi)
1231     vect_finish_stmt_generation (stmt, new_stmt, gsi);
1232   else
1233     {
1234       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1235       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236 
1237       if (loop_vinfo)
1238         {
1239           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1240 	  basic_block new_bb;
1241 	  edge pe;
1242 
1243           if (nested_in_vect_loop_p (loop, stmt))
1244             loop = loop->inner;
1245 
1246 	  pe = loop_preheader_edge (loop);
1247           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1248           gcc_assert (!new_bb);
1249 	}
1250       else
1251        {
1252           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253           basic_block bb;
1254           gimple_stmt_iterator gsi_bb_start;
1255 
1256           gcc_assert (bb_vinfo);
1257           bb = BB_VINFO_BB (bb_vinfo);
1258           gsi_bb_start = gsi_after_labels (bb);
1259           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1260        }
1261     }
1262 
1263   if (dump_enabled_p ())
1264     {
1265       dump_printf_loc (MSG_NOTE, vect_location,
1266                        "created new init_stmt: ");
1267       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1268     }
1269 }
1270 
1271 /* Function vect_init_vector.
1272 
1273    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1275    vector type a vector with all elements equal to VAL is created first.
1276    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1277    initialization at the loop preheader.
1278    Return the DEF of INIT_STMT.
1279    It will be used in the vectorization of STMT.  */
1280 
1281 tree
1282 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283 {
1284   gimple *init_stmt;
1285   tree new_temp;
1286 
1287   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1288   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289     {
1290       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 	{
1293 	  /* Scalar boolean value should be transformed into
1294 	     all zeros or all ones value before building a vector.  */
1295 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1296 	    {
1297 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1299 
1300 	      if (CONSTANT_CLASS_P (val))
1301 		val = integer_zerop (val) ? false_val : true_val;
1302 	      else
1303 		{
1304 		  new_temp = make_ssa_name (TREE_TYPE (type));
1305 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306 						   val, true_val, false_val);
1307 		  vect_init_vector_1 (stmt, init_stmt, gsi);
1308 		  val = new_temp;
1309 		}
1310 	    }
1311 	  else if (CONSTANT_CLASS_P (val))
1312 	    val = fold_convert (TREE_TYPE (type), val);
1313 	  else
1314 	    {
1315 	      new_temp = make_ssa_name (TREE_TYPE (type));
1316 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317 		init_stmt = gimple_build_assign (new_temp,
1318 						 fold_build1 (VIEW_CONVERT_EXPR,
1319 							      TREE_TYPE (type),
1320 							      val));
1321 	      else
1322 		init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1323 	      vect_init_vector_1 (stmt, init_stmt, gsi);
1324 	      val = new_temp;
1325 	    }
1326 	}
1327       val = build_vector_from_val (type, val);
1328     }
1329 
1330   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331   init_stmt = gimple_build_assign  (new_temp, val);
1332   vect_init_vector_1 (stmt, init_stmt, gsi);
1333   return new_temp;
1334 }
1335 
1336 /* Function vect_get_vec_def_for_operand_1.
1337 
1338    For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339    DT that will be used in the vectorized stmt.  */
1340 
1341 tree
1342 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343 {
1344   tree vec_oprnd;
1345   gimple *vec_stmt;
1346   stmt_vec_info def_stmt_info = NULL;
1347 
1348   switch (dt)
1349     {
1350     /* operand is a constant or a loop invariant.  */
1351     case vect_constant_def:
1352     case vect_external_def:
1353       /* Code should use vect_get_vec_def_for_operand.  */
1354       gcc_unreachable ();
1355 
1356     /* operand is defined inside the loop.  */
1357     case vect_internal_def:
1358       {
1359         /* Get the def from the vectorized stmt.  */
1360         def_stmt_info = vinfo_for_stmt (def_stmt);
1361 
1362         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1363         /* Get vectorized pattern statement.  */
1364         if (!vec_stmt
1365             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366             && !STMT_VINFO_RELEVANT (def_stmt_info))
1367           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1369         gcc_assert (vec_stmt);
1370 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371 	  vec_oprnd = PHI_RESULT (vec_stmt);
1372 	else if (is_gimple_call (vec_stmt))
1373 	  vec_oprnd = gimple_call_lhs (vec_stmt);
1374 	else
1375 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1376         return vec_oprnd;
1377       }
1378 
1379     /* operand is defined by a loop header phi.  */
1380     case vect_reduction_def:
1381     case vect_double_reduction_def:
1382     case vect_nested_cycle:
1383     case vect_induction_def:
1384       {
1385 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386 
1387         /* Get the def from the vectorized stmt.  */
1388         def_stmt_info = vinfo_for_stmt (def_stmt);
1389         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 	  vec_oprnd = PHI_RESULT (vec_stmt);
1392 	else
1393 	  vec_oprnd = gimple_get_lhs (vec_stmt);
1394         return vec_oprnd;
1395       }
1396 
1397     default:
1398       gcc_unreachable ();
1399     }
1400 }
1401 
1402 
1403 /* Function vect_get_vec_def_for_operand.
1404 
1405    OP is an operand in STMT.  This function returns a (vector) def that will be
1406    used in the vectorized stmt for STMT.
1407 
1408    In the case that OP is an SSA_NAME which is defined in the loop, then
1409    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 
1411    In case OP is an invariant or constant, a new stmt that creates a vector def
1412    needs to be introduced.  VECTYPE may be used to specify a required type for
1413    vector invariant.  */
1414 
1415 tree
1416 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 {
1418   gimple *def_stmt;
1419   enum vect_def_type dt;
1420   bool is_simple_use;
1421   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423 
1424   if (dump_enabled_p ())
1425     {
1426       dump_printf_loc (MSG_NOTE, vect_location,
1427                        "vect_get_vec_def_for_operand: ");
1428       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429       dump_printf (MSG_NOTE, "\n");
1430     }
1431 
1432   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433   gcc_assert (is_simple_use);
1434   if (def_stmt && dump_enabled_p ())
1435     {
1436       dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1437       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438     }
1439 
1440   if (dt == vect_constant_def || dt == vect_external_def)
1441     {
1442       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443       tree vector_type;
1444 
1445       if (vectype)
1446 	vector_type = vectype;
1447       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1448 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449 	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450       else
1451 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452 
1453       gcc_assert (vector_type);
1454       return vect_init_vector (stmt, op, vector_type, NULL);
1455     }
1456   else
1457     return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1458 }
1459 
1460 
1461 /* Function vect_get_vec_def_for_stmt_copy
1462 
1463    Return a vector-def for an operand.  This function is used when the
1464    vectorized stmt to be created (by the caller to this function) is a "copy"
1465    created in case the vectorized result cannot fit in one vector, and several
1466    copies of the vector-stmt are required.  In this case the vector-def is
1467    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468    of the stmt that defines VEC_OPRND.
1469    DT is the type of the vector def VEC_OPRND.
1470 
1471    Context:
1472         In case the vectorization factor (VF) is bigger than the number
1473    of elements that can fit in a vectype (nunits), we have to generate
1474    more than one vector stmt to vectorize the scalar stmt.  This situation
1475    arises when there are multiple data-types operated upon in the loop; the
1476    smallest data-type determines the VF, and as a result, when vectorizing
1477    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478    vector stmt (each computing a vector of 'nunits' results, and together
1479    computing 'VF' results in each iteration).  This function is called when
1480    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481    which VF=16 and nunits=4, so the number of copies required is 4):
1482 
1483    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1484 
1485    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1486                         VS1.1:  vx.1 = memref1      VS1.2
1487                         VS1.2:  vx.2 = memref2      VS1.3
1488                         VS1.3:  vx.3 = memref3
1489 
1490    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1491                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1492                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1493                         VSnew.3:  vz3 = vx.3 + ...
1494 
1495    The vectorization of S1 is explained in vectorizable_load.
1496    The vectorization of S2:
1497         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498    the function 'vect_get_vec_def_for_operand' is called to
1499    get the relevant vector-def for each operand of S2.  For operand x it
1500    returns  the vector-def 'vx.0'.
1501 
1502         To create the remaining copies of the vector-stmt (VSnew.j), this
1503    function is called to get the relevant vector-def for each operand.  It is
1504    obtained from the respective VS1.j stmt, which is recorded in the
1505    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 
1507         For example, to obtain the vector-def 'vx.1' in order to create the
1508    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511    and return its def ('vx.1').
1512    Overall, to create the above sequence this function will be called 3 times:
1513         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1516 
1517 tree
1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519 {
1520   gimple *vec_stmt_for_operand;
1521   stmt_vec_info def_stmt_info;
1522 
1523   /* Do nothing; can reuse same def.  */
1524   if (dt == vect_external_def || dt == vect_constant_def )
1525     return vec_oprnd;
1526 
1527   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529   gcc_assert (def_stmt_info);
1530   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531   gcc_assert (vec_stmt_for_operand);
1532   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534   else
1535     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536   return vec_oprnd;
1537 }
1538 
1539 
1540 /* Get vectorized definitions for the operands to create a copy of an original
1541    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1542 
1543 void
1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1545 				 vec<tree> *vec_oprnds0,
1546 				 vec<tree> *vec_oprnds1)
1547 {
1548   tree vec_oprnd = vec_oprnds0->pop ();
1549 
1550   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1551   vec_oprnds0->quick_push (vec_oprnd);
1552 
1553   if (vec_oprnds1 && vec_oprnds1->length ())
1554     {
1555       vec_oprnd = vec_oprnds1->pop ();
1556       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1557       vec_oprnds1->quick_push (vec_oprnd);
1558     }
1559 }
1560 
1561 
1562 /* Get vectorized definitions for OP0 and OP1.  */
1563 
1564 void
1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 		   vec<tree> *vec_oprnds0,
1567 		   vec<tree> *vec_oprnds1,
1568 		   slp_tree slp_node)
1569 {
1570   if (slp_node)
1571     {
1572       int nops = (op1 == NULL_TREE) ? 1 : 2;
1573       auto_vec<tree> ops (nops);
1574       auto_vec<vec<tree> > vec_defs (nops);
1575 
1576       ops.quick_push (op0);
1577       if (op1)
1578         ops.quick_push (op1);
1579 
1580       vect_get_slp_defs (ops, slp_node, &vec_defs);
1581 
1582       *vec_oprnds0 = vec_defs[0];
1583       if (op1)
1584 	*vec_oprnds1 = vec_defs[1];
1585     }
1586   else
1587     {
1588       tree vec_oprnd;
1589 
1590       vec_oprnds0->create (1);
1591       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592       vec_oprnds0->quick_push (vec_oprnd);
1593 
1594       if (op1)
1595 	{
1596 	  vec_oprnds1->create (1);
1597 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 	  vec_oprnds1->quick_push (vec_oprnd);
1599 	}
1600     }
1601 }
1602 
1603 /* Helper function called by vect_finish_replace_stmt and
1604    vect_finish_stmt_generation.  Set the location of the new
1605    statement and create a stmt_vec_info for it.  */
1606 
1607 static void
1608 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1609 {
1610   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1611   vec_info *vinfo = stmt_info->vinfo;
1612 
1613   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614 
1615   if (dump_enabled_p ())
1616     {
1617       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1619     }
1620 
1621   gimple_set_location (vec_stmt, gimple_location (stmt));
1622 
1623   /* While EH edges will generally prevent vectorization, stmt might
1624      e.g. be in a must-not-throw region.  Ensure newly created stmts
1625      that could throw are part of the same region.  */
1626   int lp_nr = lookup_stmt_eh_lp (stmt);
1627   if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1629 }
1630 
1631 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1632    which sets the same scalar result as STMT did.  */
1633 
1634 void
1635 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1636 {
1637   gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1638 
1639   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640   gsi_replace (&gsi, vec_stmt, true);
1641 
1642   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1643 }
1644 
1645 /* Function vect_finish_stmt_generation.
1646 
1647    Insert a new stmt.  */
1648 
1649 void
1650 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1651 			     gimple_stmt_iterator *gsi)
1652 {
1653   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654 
1655   if (!gsi_end_p (*gsi)
1656       && gimple_has_mem_ops (vec_stmt))
1657     {
1658       gimple *at_stmt = gsi_stmt (*gsi);
1659       tree vuse = gimple_vuse (at_stmt);
1660       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661 	{
1662 	  tree vdef = gimple_vdef (at_stmt);
1663 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 	  /* If we have an SSA vuse and insert a store, update virtual
1665 	     SSA form to avoid triggering the renamer.  Do so only
1666 	     if we can easily see all uses - which is what almost always
1667 	     happens with the way vectorized stmts are inserted.  */
1668 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 	      && ((is_gimple_assign (vec_stmt)
1670 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 		  || (is_gimple_call (vec_stmt)
1672 		      && !(gimple_call_flags (vec_stmt)
1673 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674 	    {
1675 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 	      gimple_set_vdef (vec_stmt, new_vdef);
1677 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678 	    }
1679 	}
1680     }
1681   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1682   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1683 }
1684 
1685 /* We want to vectorize a call to combined function CFN with function
1686    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1687    as the types of all inputs.  Check whether this is possible using
1688    an internal function, returning its code if so or IFN_LAST if not.  */
1689 
1690 static internal_fn
1691 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1692 				tree vectype_out, tree vectype_in)
1693 {
1694   internal_fn ifn;
1695   if (internal_fn_p (cfn))
1696     ifn = as_internal_fn (cfn);
1697   else
1698     ifn = associated_internal_fn (fndecl);
1699   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1700     {
1701       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1702       if (info.vectorizable)
1703 	{
1704 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1705 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1706 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1707 					      OPTIMIZE_FOR_SPEED))
1708 	    return ifn;
1709 	}
1710     }
1711   return IFN_LAST;
1712 }
1713 
1714 
1715 static tree permute_vec_elements (tree, tree, tree, gimple *,
1716 				  gimple_stmt_iterator *);
1717 
1718 /* Check whether a load or store statement in the loop described by
1719    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1720    whether the vectorizer pass has the appropriate support, as well as
1721    whether the target does.
1722 
1723    VLS_TYPE says whether the statement is a load or store and VECTYPE
1724    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1725    says how the load or store is going to be implemented and GROUP_SIZE
1726    is the number of load or store statements in the containing group.
1727    If the access is a gather load or scatter store, GS_INFO describes
1728    its arguments.
1729 
1730    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1731    supported, otherwise record the required mask types.  */
1732 
1733 static void
1734 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1735 			  vec_load_store_type vls_type, int group_size,
1736 			  vect_memory_access_type memory_access_type,
1737 			  gather_scatter_info *gs_info)
1738 {
1739   /* Invariant loads need no special support.  */
1740   if (memory_access_type == VMAT_INVARIANT)
1741     return;
1742 
1743   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744   machine_mode vecmode = TYPE_MODE (vectype);
1745   bool is_load = (vls_type == VLS_LOAD);
1746   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1747     {
1748       if (is_load
1749 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1750 	  : !vect_store_lanes_supported (vectype, group_size, true))
1751 	{
1752 	  if (dump_enabled_p ())
1753 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 			     "can't use a fully-masked loop because the"
1755 			     " target doesn't have an appropriate masked"
1756 			     " load/store-lanes instruction.\n");
1757 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758 	  return;
1759 	}
1760       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762       return;
1763     }
1764 
1765   if (memory_access_type == VMAT_GATHER_SCATTER)
1766     {
1767       internal_fn ifn = (is_load
1768 			 ? IFN_MASK_GATHER_LOAD
1769 			 : IFN_MASK_SCATTER_STORE);
1770       tree offset_type = TREE_TYPE (gs_info->offset);
1771       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1772 						   gs_info->memory_type,
1773 						   TYPE_SIGN (offset_type),
1774 						   gs_info->scale))
1775 	{
1776 	  if (dump_enabled_p ())
1777 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 			     "can't use a fully-masked loop because the"
1779 			     " target doesn't have an appropriate masked"
1780 			     " gather load or scatter store instruction.\n");
1781 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1782 	  return;
1783 	}
1784       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1785       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1786       return;
1787     }
1788 
1789   if (memory_access_type != VMAT_CONTIGUOUS
1790       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1791     {
1792       /* Element X of the data must come from iteration i * VF + X of the
1793 	 scalar loop.  We need more work to support other mappings.  */
1794       if (dump_enabled_p ())
1795 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796 			 "can't use a fully-masked loop because an access"
1797 			 " isn't contiguous.\n");
1798       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1799       return;
1800     }
1801 
1802   machine_mode mask_mode;
1803   if (!(targetm.vectorize.get_mask_mode
1804 	(GET_MODE_NUNITS (vecmode),
1805 	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1806       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1807     {
1808       if (dump_enabled_p ())
1809 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 			 "can't use a fully-masked loop because the target"
1811 			 " doesn't have the appropriate masked load or"
1812 			 " store.\n");
1813       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814       return;
1815     }
1816   /* We might load more scalars than we need for permuting SLP loads.
1817      We checked in get_group_load_store_type that the extra elements
1818      don't leak into a new vector.  */
1819   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1820   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1821   unsigned int nvectors;
1822   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1823     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1824   else
1825     gcc_unreachable ();
1826 }
1827 
1828 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1829    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1830    that needs to be applied to all loads and stores in a vectorized loop.
1831    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1832 
1833    MASK_TYPE is the type of both masks.  If new statements are needed,
1834    insert them before GSI.  */
1835 
1836 static tree
1837 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1838 			 gimple_stmt_iterator *gsi)
1839 {
1840   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1841   if (!loop_mask)
1842     return vec_mask;
1843 
1844   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1845   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1846   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1847 					  vec_mask, loop_mask);
1848   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1849   return and_res;
1850 }
1851 
1852 /* Determine whether we can use a gather load or scatter store to vectorize
1853    strided load or store STMT by truncating the current offset to a smaller
1854    width.  We need to be able to construct an offset vector:
1855 
1856      { 0, X, X*2, X*3, ... }
1857 
1858    without loss of precision, where X is STMT's DR_STEP.
1859 
1860    Return true if this is possible, describing the gather load or scatter
1861    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1862 
1863 static bool
1864 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1865 				     bool masked_p,
1866 				     gather_scatter_info *gs_info)
1867 {
1868   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1869   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1870   tree step = DR_STEP (dr);
1871   if (TREE_CODE (step) != INTEGER_CST)
1872     {
1873       /* ??? Perhaps we could use range information here?  */
1874       if (dump_enabled_p ())
1875 	dump_printf_loc (MSG_NOTE, vect_location,
1876 			 "cannot truncate variable step.\n");
1877       return false;
1878     }
1879 
1880   /* Get the number of bits in an element.  */
1881   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1883   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1884 
1885   /* Set COUNT to the upper limit on the number of elements - 1.
1886      Start with the maximum vectorization factor.  */
1887   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1888 
1889   /* Try lowering COUNT to the number of scalar latch iterations.  */
1890   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1891   widest_int max_iters;
1892   if (max_loop_iterations (loop, &max_iters)
1893       && max_iters < count)
1894     count = max_iters.to_shwi ();
1895 
1896   /* Try scales of 1 and the element size.  */
1897   int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1898   bool overflow_p = false;
1899   for (int i = 0; i < 2; ++i)
1900     {
1901       int scale = scales[i];
1902       widest_int factor;
1903       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1904 	continue;
1905 
1906       /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1907 	 in OFFSET_BITS bits.  */
1908       widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1909       if (overflow_p)
1910 	continue;
1911       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1912       if (wi::min_precision (range, sign) > element_bits)
1913 	{
1914 	  overflow_p = true;
1915 	  continue;
1916 	}
1917 
1918       /* See whether the target supports the operation.  */
1919       tree memory_type = TREE_TYPE (DR_REF (dr));
1920       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1921 				     memory_type, element_bits, sign, scale,
1922 				     &gs_info->ifn, &gs_info->element_type))
1923 	continue;
1924 
1925       tree offset_type = build_nonstandard_integer_type (element_bits,
1926 							 sign == UNSIGNED);
1927 
1928       gs_info->decl = NULL_TREE;
1929       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1930 	 but we don't need to store that here.  */
1931       gs_info->base = NULL_TREE;
1932       gs_info->offset = fold_convert (offset_type, step);
1933       gs_info->offset_dt = vect_constant_def;
1934       gs_info->offset_vectype = NULL_TREE;
1935       gs_info->scale = scale;
1936       gs_info->memory_type = memory_type;
1937       return true;
1938     }
1939 
1940   if (overflow_p && dump_enabled_p ())
1941     dump_printf_loc (MSG_NOTE, vect_location,
1942 		     "truncating gather/scatter offset to %d bits"
1943 		     " might change its value.\n", element_bits);
1944 
1945   return false;
1946 }
1947 
1948 /* Return true if we can use gather/scatter internal functions to
1949    vectorize STMT, which is a grouped or strided load or store.
1950    MASKED_P is true if load or store is conditional.  When returning
1951    true, fill in GS_INFO with the information required to perform the
1952    operation.  */
1953 
1954 static bool
1955 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
1956 				    bool masked_p,
1957 				    gather_scatter_info *gs_info)
1958 {
1959   if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1960       || gs_info->decl)
1961     return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1962 						masked_p, gs_info);
1963 
1964   scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1965   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1966   tree offset_type = TREE_TYPE (gs_info->offset);
1967   unsigned int offset_bits = TYPE_PRECISION (offset_type);
1968 
1969   /* Enforced by vect_check_gather_scatter.  */
1970   gcc_assert (element_bits >= offset_bits);
1971 
1972   /* If the elements are wider than the offset, convert the offset to the
1973      same width, without changing its sign.  */
1974   if (element_bits > offset_bits)
1975     {
1976       bool unsigned_p = TYPE_UNSIGNED (offset_type);
1977       offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1978       gs_info->offset = fold_convert (offset_type, gs_info->offset);
1979     }
1980 
1981   if (dump_enabled_p ())
1982     dump_printf_loc (MSG_NOTE, vect_location,
1983 		     "using gather/scatter for strided/grouped access,"
1984 		     " scale = %d\n", gs_info->scale);
1985 
1986   return true;
1987 }
1988 
1989 /* STMT is a non-strided load or store, meaning that it accesses
1990    elements with a known constant step.  Return -1 if that step
1991    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1992 
1993 static int
1994 compare_step_with_zero (gimple *stmt)
1995 {
1996   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1997   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998   return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1999 			       size_zero_node);
2000 }
2001 
2002 /* If the target supports a permute mask that reverses the elements in
2003    a vector of type VECTYPE, return that mask, otherwise return null.  */
2004 
2005 static tree
2006 perm_mask_for_reverse (tree vectype)
2007 {
2008   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2009 
2010   /* The encoding has a single stepped pattern.  */
2011   vec_perm_builder sel (nunits, 1, 3);
2012   for (int i = 0; i < 3; ++i)
2013     sel.quick_push (nunits - 1 - i);
2014 
2015   vec_perm_indices indices (sel, 1, nunits);
2016   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2017     return NULL_TREE;
2018   return vect_gen_perm_mask_checked (vectype, indices);
2019 }
2020 
2021 /* STMT is either a masked or unconditional store.  Return the value
2022    being stored.  */
2023 
2024 tree
2025 vect_get_store_rhs (gimple *stmt)
2026 {
2027   if (gassign *assign = dyn_cast <gassign *> (stmt))
2028     {
2029       gcc_assert (gimple_assign_single_p (assign));
2030       return gimple_assign_rhs1 (assign);
2031     }
2032   if (gcall *call = dyn_cast <gcall *> (stmt))
2033     {
2034       internal_fn ifn = gimple_call_internal_fn (call);
2035       int index = internal_fn_stored_value_index (ifn);
2036       gcc_assert (index >= 0);
2037       return gimple_call_arg (stmt, index);
2038     }
2039   gcc_unreachable ();
2040 }
2041 
2042 /* A subroutine of get_load_store_type, with a subset of the same
2043    arguments.  Handle the case where STMT is part of a grouped load
2044    or store.
2045 
2046    For stores, the statements in the group are all consecutive
2047    and there is no gap at the end.  For loads, the statements in the
2048    group might not be consecutive; there can be gaps between statements
2049    as well as at the end.  */
2050 
2051 static bool
2052 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2053 			   bool masked_p, vec_load_store_type vls_type,
2054 			   vect_memory_access_type *memory_access_type,
2055 			   gather_scatter_info *gs_info)
2056 {
2057   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058   vec_info *vinfo = stmt_info->vinfo;
2059   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2060   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2061   gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
2062   data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2063   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2064   bool single_element_p = (stmt == first_stmt
2065 			   && !GROUP_NEXT_ELEMENT (stmt_info));
2066   unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
2067   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2068 
2069   /* True if the vectorized statements would access beyond the last
2070      statement in the group.  */
2071   bool overrun_p = false;
2072 
2073   /* True if we can cope with such overrun by peeling for gaps, so that
2074      there is at least one final scalar iteration after the vector loop.  */
2075   bool can_overrun_p = (!masked_p
2076 			&& vls_type == VLS_LOAD
2077 			&& loop_vinfo
2078 			&& !loop->inner);
2079 
2080   /* There can only be a gap at the end of the group if the stride is
2081      known at compile time.  */
2082   gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2083 
2084   /* Stores can't yet have gaps.  */
2085   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2086 
2087   if (slp)
2088     {
2089       if (STMT_VINFO_STRIDED_P (stmt_info))
2090 	{
2091 	  /* Try to use consecutive accesses of GROUP_SIZE elements,
2092 	     separated by the stride, until we have a complete vector.
2093 	     Fall back to scalar accesses if that isn't possible.  */
2094 	  if (multiple_p (nunits, group_size))
2095 	    *memory_access_type = VMAT_STRIDED_SLP;
2096 	  else
2097 	    *memory_access_type = VMAT_ELEMENTWISE;
2098 	}
2099       else
2100 	{
2101 	  overrun_p = loop_vinfo && gap != 0;
2102 	  if (overrun_p && vls_type != VLS_LOAD)
2103 	    {
2104 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2105 			       "Grouped store with gaps requires"
2106 			       " non-consecutive accesses\n");
2107 	      return false;
2108 	    }
2109 	  /* An overrun is fine if the trailing elements are smaller
2110 	     than the alignment boundary B.  Every vector access will
2111 	     be a multiple of B and so we are guaranteed to access a
2112 	     non-gap element in the same B-sized block.  */
2113 	  if (overrun_p
2114 	      && gap < (vect_known_alignment_in_bytes (first_dr)
2115 			/ vect_get_scalar_dr_size (first_dr)))
2116 	    overrun_p = false;
2117 	  if (overrun_p && !can_overrun_p)
2118 	    {
2119 	      if (dump_enabled_p ())
2120 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121 				 "Peeling for outer loop is not supported\n");
2122 	      return false;
2123 	    }
2124 	  *memory_access_type = VMAT_CONTIGUOUS;
2125 	}
2126     }
2127   else
2128     {
2129       /* We can always handle this case using elementwise accesses,
2130 	 but see if something more efficient is available.  */
2131       *memory_access_type = VMAT_ELEMENTWISE;
2132 
2133       /* If there is a gap at the end of the group then these optimizations
2134 	 would access excess elements in the last iteration.  */
2135       bool would_overrun_p = (gap != 0);
2136       /* An overrun is fine if the trailing elements are smaller than the
2137 	 alignment boundary B.  Every vector access will be a multiple of B
2138 	 and so we are guaranteed to access a non-gap element in the
2139 	 same B-sized block.  */
2140       if (would_overrun_p
2141 	  && !masked_p
2142 	  && gap < (vect_known_alignment_in_bytes (first_dr)
2143 		    / vect_get_scalar_dr_size (first_dr)))
2144 	would_overrun_p = false;
2145 
2146       if (!STMT_VINFO_STRIDED_P (stmt_info)
2147 	  && (can_overrun_p || !would_overrun_p)
2148 	  && compare_step_with_zero (stmt) > 0)
2149 	{
2150 	  /* First cope with the degenerate case of a single-element
2151 	     vector.  */
2152 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2153 	    *memory_access_type = VMAT_CONTIGUOUS;
2154 
2155 	  /* Otherwise try using LOAD/STORE_LANES.  */
2156 	  if (*memory_access_type == VMAT_ELEMENTWISE
2157 	      && (vls_type == VLS_LOAD
2158 		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
2159 		  : vect_store_lanes_supported (vectype, group_size,
2160 						masked_p)))
2161 	    {
2162 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2163 	      overrun_p = would_overrun_p;
2164 	    }
2165 
2166 	  /* If that fails, try using permuting loads.  */
2167 	  if (*memory_access_type == VMAT_ELEMENTWISE
2168 	      && (vls_type == VLS_LOAD
2169 		  ? vect_grouped_load_supported (vectype, single_element_p,
2170 						 group_size)
2171 		  : vect_grouped_store_supported (vectype, group_size)))
2172 	    {
2173 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2174 	      overrun_p = would_overrun_p;
2175 	    }
2176 	}
2177 
2178       /* As a last resort, trying using a gather load or scatter store.
2179 
2180 	 ??? Although the code can handle all group sizes correctly,
2181 	 it probably isn't a win to use separate strided accesses based
2182 	 on nearby locations.  Or, even if it's a win over scalar code,
2183 	 it might not be a win over vectorizing at a lower VF, if that
2184 	 allows us to use contiguous accesses.  */
2185       if (*memory_access_type == VMAT_ELEMENTWISE
2186 	  && single_element_p
2187 	  && loop_vinfo
2188 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2189 						 masked_p, gs_info))
2190 	*memory_access_type = VMAT_GATHER_SCATTER;
2191     }
2192 
2193   if (vls_type != VLS_LOAD && first_stmt == stmt)
2194     {
2195       /* STMT is the leader of the group. Check the operands of all the
2196 	 stmts of the group.  */
2197       gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2198       while (next_stmt)
2199 	{
2200 	  tree op = vect_get_store_rhs (next_stmt);
2201 	  gimple *def_stmt;
2202 	  enum vect_def_type dt;
2203 	  if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2204 	    {
2205 	      if (dump_enabled_p ())
2206 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2207 				 "use not simple.\n");
2208 	      return false;
2209 	    }
2210 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2211 	}
2212     }
2213 
2214   if (overrun_p)
2215     {
2216       gcc_assert (can_overrun_p);
2217       if (dump_enabled_p ())
2218 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219 			 "Data access with gaps requires scalar "
2220 			 "epilogue loop\n");
2221       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2222     }
2223 
2224   return true;
2225 }
2226 
2227 /* A subroutine of get_load_store_type, with a subset of the same
2228    arguments.  Handle the case where STMT is a load or store that
2229    accesses consecutive elements with a negative step.  */
2230 
2231 static vect_memory_access_type
2232 get_negative_load_store_type (gimple *stmt, tree vectype,
2233 			      vec_load_store_type vls_type,
2234 			      unsigned int ncopies)
2235 {
2236   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2237   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2238   dr_alignment_support alignment_support_scheme;
2239 
2240   if (ncopies > 1)
2241     {
2242       if (dump_enabled_p ())
2243 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244 			 "multiple types with negative step.\n");
2245       return VMAT_ELEMENTWISE;
2246     }
2247 
2248   alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2249   if (alignment_support_scheme != dr_aligned
2250       && alignment_support_scheme != dr_unaligned_supported)
2251     {
2252       if (dump_enabled_p ())
2253 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 			 "negative step but alignment required.\n");
2255       return VMAT_ELEMENTWISE;
2256     }
2257 
2258   if (vls_type == VLS_STORE_INVARIANT)
2259     {
2260       if (dump_enabled_p ())
2261 	dump_printf_loc (MSG_NOTE, vect_location,
2262 			 "negative step with invariant source;"
2263 			 " no permute needed.\n");
2264       return VMAT_CONTIGUOUS_DOWN;
2265     }
2266 
2267   if (!perm_mask_for_reverse (vectype))
2268     {
2269       if (dump_enabled_p ())
2270 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2271 			 "negative step and reversing not supported.\n");
2272       return VMAT_ELEMENTWISE;
2273     }
2274 
2275   return VMAT_CONTIGUOUS_REVERSE;
2276 }
2277 
2278 /* Analyze load or store statement STMT of type VLS_TYPE.  Return true
2279    if there is a memory access type that the vectorized form can use,
2280    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2281    or scatters, fill in GS_INFO accordingly.
2282 
2283    SLP says whether we're performing SLP rather than loop vectorization.
2284    MASKED_P is true if the statement is conditional on a vectorized mask.
2285    VECTYPE is the vector type that the vectorized statements will use.
2286    NCOPIES is the number of vector statements that will be needed.  */
2287 
2288 static bool
2289 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2290 		     vec_load_store_type vls_type, unsigned int ncopies,
2291 		     vect_memory_access_type *memory_access_type,
2292 		     gather_scatter_info *gs_info)
2293 {
2294   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2295   vec_info *vinfo = stmt_info->vinfo;
2296   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2297   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2298   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2299     {
2300       *memory_access_type = VMAT_GATHER_SCATTER;
2301       gimple *def_stmt;
2302       if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2303 	gcc_unreachable ();
2304       else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2305 				    &gs_info->offset_dt,
2306 				    &gs_info->offset_vectype))
2307 	{
2308 	  if (dump_enabled_p ())
2309 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2310 			     "%s index use not simple.\n",
2311 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2312 	  return false;
2313 	}
2314     }
2315   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2316     {
2317       if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2318 				      memory_access_type, gs_info))
2319 	return false;
2320     }
2321   else if (STMT_VINFO_STRIDED_P (stmt_info))
2322     {
2323       gcc_assert (!slp);
2324       if (loop_vinfo
2325 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2326 						 masked_p, gs_info))
2327 	*memory_access_type = VMAT_GATHER_SCATTER;
2328       else
2329 	*memory_access_type = VMAT_ELEMENTWISE;
2330     }
2331   else
2332     {
2333       int cmp = compare_step_with_zero (stmt);
2334       if (cmp < 0)
2335 	*memory_access_type = get_negative_load_store_type
2336 	  (stmt, vectype, vls_type, ncopies);
2337       else if (cmp == 0)
2338 	{
2339 	  gcc_assert (vls_type == VLS_LOAD);
2340 	  *memory_access_type = VMAT_INVARIANT;
2341 	}
2342       else
2343 	*memory_access_type = VMAT_CONTIGUOUS;
2344     }
2345 
2346   if ((*memory_access_type == VMAT_ELEMENTWISE
2347        || *memory_access_type == VMAT_STRIDED_SLP)
2348       && !nunits.is_constant ())
2349     {
2350       if (dump_enabled_p ())
2351 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 			 "Not using elementwise accesses due to variable "
2353 			 "vectorization factor.\n");
2354       return false;
2355     }
2356 
2357   /* FIXME: At the moment the cost model seems to underestimate the
2358      cost of using elementwise accesses.  This check preserves the
2359      traditional behavior until that can be fixed.  */
2360   if (*memory_access_type == VMAT_ELEMENTWISE
2361       && !STMT_VINFO_STRIDED_P (stmt_info)
2362       && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2363 	   && !GROUP_NEXT_ELEMENT (stmt_info)
2364 	   && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2365     {
2366       if (dump_enabled_p ())
2367 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 			 "not falling back to elementwise accesses\n");
2369       return false;
2370     }
2371   return true;
2372 }
2373 
2374 /* Return true if boolean argument MASK is suitable for vectorizing
2375    conditional load or store STMT.  When returning true, store the type
2376    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2377    in *MASK_VECTYPE_OUT.  */
2378 
2379 static bool
2380 vect_check_load_store_mask (gimple *stmt, tree mask,
2381 			    vect_def_type *mask_dt_out,
2382 			    tree *mask_vectype_out)
2383 {
2384   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2385     {
2386       if (dump_enabled_p ())
2387 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2388 			 "mask argument is not a boolean.\n");
2389       return false;
2390     }
2391 
2392   if (TREE_CODE (mask) != SSA_NAME)
2393     {
2394       if (dump_enabled_p ())
2395 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 			 "mask argument is not an SSA name.\n");
2397       return false;
2398     }
2399 
2400   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2401   gimple *def_stmt;
2402   enum vect_def_type mask_dt;
2403   tree mask_vectype;
2404   if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2405 			   &mask_vectype))
2406     {
2407       if (dump_enabled_p ())
2408 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 			 "mask use not simple.\n");
2410       return false;
2411     }
2412 
2413   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2414   if (!mask_vectype)
2415     mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2416 
2417   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2418     {
2419       if (dump_enabled_p ())
2420 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 			 "could not find an appropriate vector mask type.\n");
2422       return false;
2423     }
2424 
2425   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2426 		TYPE_VECTOR_SUBPARTS (vectype)))
2427     {
2428       if (dump_enabled_p ())
2429 	{
2430 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2431 			   "vector mask type ");
2432 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2433 	  dump_printf (MSG_MISSED_OPTIMIZATION,
2434 		       " does not match vector data type ");
2435 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2436 	  dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2437 	}
2438       return false;
2439     }
2440 
2441   *mask_dt_out = mask_dt;
2442   *mask_vectype_out = mask_vectype;
2443   return true;
2444 }
2445 
2446 /* Return true if stored value RHS is suitable for vectorizing store
2447    statement STMT.  When returning true, store the type of the
2448    definition in *RHS_DT_OUT, the type of the vectorized store value in
2449    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2450 
2451 static bool
2452 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2453 		      tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2454 {
2455   /* In the case this is a store from a constant make sure
2456      native_encode_expr can handle it.  */
2457   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2458     {
2459       if (dump_enabled_p ())
2460 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 			 "cannot encode constant as a byte sequence.\n");
2462       return false;
2463     }
2464 
2465   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2466   gimple *def_stmt;
2467   enum vect_def_type rhs_dt;
2468   tree rhs_vectype;
2469   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2470 			   &rhs_vectype))
2471     {
2472       if (dump_enabled_p ())
2473 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474 			 "use not simple.\n");
2475       return false;
2476     }
2477 
2478   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2480     {
2481       if (dump_enabled_p ())
2482 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 			 "incompatible vector types.\n");
2484       return false;
2485     }
2486 
2487   *rhs_dt_out = rhs_dt;
2488   *rhs_vectype_out = rhs_vectype;
2489   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2490     *vls_type_out = VLS_STORE_INVARIANT;
2491   else
2492     *vls_type_out = VLS_STORE;
2493   return true;
2494 }
2495 
2496 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2497    Note that we support masks with floating-point type, in which case the
2498    floats are interpreted as a bitmask.  */
2499 
2500 static tree
2501 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2502 {
2503   if (TREE_CODE (masktype) == INTEGER_TYPE)
2504     return build_int_cst (masktype, -1);
2505   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2506     {
2507       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2508       mask = build_vector_from_val (masktype, mask);
2509       return vect_init_vector (stmt, mask, masktype, NULL);
2510     }
2511   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2512     {
2513       REAL_VALUE_TYPE r;
2514       long tmp[6];
2515       for (int j = 0; j < 6; ++j)
2516 	tmp[j] = -1;
2517       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2518       tree mask = build_real (TREE_TYPE (masktype), r);
2519       mask = build_vector_from_val (masktype, mask);
2520       return vect_init_vector (stmt, mask, masktype, NULL);
2521     }
2522   gcc_unreachable ();
2523 }
2524 
2525 /* Build an all-zero merge value of type VECTYPE while vectorizing
2526    STMT as a gather load.  */
2527 
2528 static tree
2529 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2530 {
2531   tree merge;
2532   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2533     merge = build_int_cst (TREE_TYPE (vectype), 0);
2534   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2535     {
2536       REAL_VALUE_TYPE r;
2537       long tmp[6];
2538       for (int j = 0; j < 6; ++j)
2539 	tmp[j] = 0;
2540       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2541       merge = build_real (TREE_TYPE (vectype), r);
2542     }
2543   else
2544     gcc_unreachable ();
2545   merge = build_vector_from_val (vectype, merge);
2546   return vect_init_vector (stmt, merge, vectype, NULL);
2547 }
2548 
2549 /* Build a gather load call while vectorizing STMT.  Insert new instructions
2550    before GSI and add them to VEC_STMT.  GS_INFO describes the gather load
2551    operation.  If the load is conditional, MASK is the unvectorized
2552    condition and MASK_DT is its definition type, otherwise MASK is null.  */
2553 
2554 static void
2555 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2556 			      gimple **vec_stmt, gather_scatter_info *gs_info,
2557 			      tree mask, vect_def_type mask_dt)
2558 {
2559   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2560   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2561   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2562   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2563   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2564   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2565   edge pe = loop_preheader_edge (loop);
2566   enum { NARROW, NONE, WIDEN } modifier;
2567   poly_uint64 gather_off_nunits
2568     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2569 
2570   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2571   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2572   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2573   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2574   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2575   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2576   tree scaletype = TREE_VALUE (arglist);
2577   gcc_checking_assert (types_compatible_p (srctype, rettype)
2578 		       && (!mask || types_compatible_p (srctype, masktype)));
2579 
2580   tree perm_mask = NULL_TREE;
2581   tree mask_perm_mask = NULL_TREE;
2582   if (known_eq (nunits, gather_off_nunits))
2583     modifier = NONE;
2584   else if (known_eq (nunits * 2, gather_off_nunits))
2585     {
2586       modifier = WIDEN;
2587 
2588       /* Currently widening gathers and scatters are only supported for
2589 	 fixed-length vectors.  */
2590       int count = gather_off_nunits.to_constant ();
2591       vec_perm_builder sel (count, count, 1);
2592       for (int i = 0; i < count; ++i)
2593 	sel.quick_push (i | (count / 2));
2594 
2595       vec_perm_indices indices (sel, 1, count);
2596       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2597 					      indices);
2598     }
2599   else if (known_eq (nunits, gather_off_nunits * 2))
2600     {
2601       modifier = NARROW;
2602 
2603       /* Currently narrowing gathers and scatters are only supported for
2604 	 fixed-length vectors.  */
2605       int count = nunits.to_constant ();
2606       vec_perm_builder sel (count, count, 1);
2607       sel.quick_grow (count);
2608       for (int i = 0; i < count; ++i)
2609 	sel[i] = i < count / 2 ? i : i + count / 2;
2610       vec_perm_indices indices (sel, 2, count);
2611       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2612 
2613       ncopies *= 2;
2614 
2615       if (mask)
2616 	{
2617 	  for (int i = 0; i < count; ++i)
2618 	    sel[i] = i | (count / 2);
2619 	  indices.new_vector (sel, 2, count);
2620 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2621 	}
2622     }
2623   else
2624     gcc_unreachable ();
2625 
2626   tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2627 					       vectype);
2628 
2629   tree ptr = fold_convert (ptrtype, gs_info->base);
2630   if (!is_gimple_min_invariant (ptr))
2631     {
2632       gimple_seq seq;
2633       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2634       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2635       gcc_assert (!new_bb);
2636     }
2637 
2638   tree scale = build_int_cst (scaletype, gs_info->scale);
2639 
2640   tree vec_oprnd0 = NULL_TREE;
2641   tree vec_mask = NULL_TREE;
2642   tree src_op = NULL_TREE;
2643   tree mask_op = NULL_TREE;
2644   tree prev_res = NULL_TREE;
2645   stmt_vec_info prev_stmt_info = NULL;
2646 
2647   if (!mask)
2648     {
2649       src_op = vect_build_zero_merge_argument (stmt, rettype);
2650       mask_op = vect_build_all_ones_mask (stmt, masktype);
2651     }
2652 
2653   for (int j = 0; j < ncopies; ++j)
2654     {
2655       tree op, var;
2656       gimple *new_stmt;
2657       if (modifier == WIDEN && (j & 1))
2658 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2659 				   perm_mask, stmt, gsi);
2660       else if (j == 0)
2661 	op = vec_oprnd0
2662 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2663       else
2664 	op = vec_oprnd0
2665 	  = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2666 
2667       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2668 	{
2669 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2670 				TYPE_VECTOR_SUBPARTS (idxtype)));
2671 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2672 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2673 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2674 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2675 	  op = var;
2676 	}
2677 
2678       if (mask)
2679 	{
2680 	  if (mask_perm_mask && (j & 1))
2681 	    mask_op = permute_vec_elements (mask_op, mask_op,
2682 					    mask_perm_mask, stmt, gsi);
2683 	  else
2684 	    {
2685 	      if (j == 0)
2686 		vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2687 	      else
2688 		vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2689 
2690 	      mask_op = vec_mask;
2691 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2692 		{
2693 		  gcc_assert
2694 		    (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2695 			       TYPE_VECTOR_SUBPARTS (masktype)));
2696 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2697 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2698 		  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2699 						  mask_op);
2700 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2701 		  mask_op = var;
2702 		}
2703 	    }
2704 	  src_op = mask_op;
2705 	}
2706 
2707       new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2708 				    mask_op, scale);
2709 
2710       if (!useless_type_conversion_p (vectype, rettype))
2711 	{
2712 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2713 				TYPE_VECTOR_SUBPARTS (rettype)));
2714 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2715 	  gimple_call_set_lhs (new_stmt, op);
2716 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2717 	  var = make_ssa_name (vec_dest);
2718 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2719 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2720 	}
2721       else
2722 	{
2723 	  var = make_ssa_name (vec_dest, new_stmt);
2724 	  gimple_call_set_lhs (new_stmt, var);
2725 	}
2726 
2727       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728 
2729       if (modifier == NARROW)
2730 	{
2731 	  if ((j & 1) == 0)
2732 	    {
2733 	      prev_res = var;
2734 	      continue;
2735 	    }
2736 	  var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2737 	  new_stmt = SSA_NAME_DEF_STMT (var);
2738 	}
2739 
2740       if (prev_stmt_info == NULL)
2741 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2742       else
2743 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2744       prev_stmt_info = vinfo_for_stmt (new_stmt);
2745     }
2746 }
2747 
2748 /* Prepare the base and offset in GS_INFO for vectorization.
2749    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2750    to the vectorized offset argument for the first copy of STMT.  STMT
2751    is the statement described by GS_INFO and LOOP is the containing loop.  */
2752 
2753 static void
2754 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2755 			     gather_scatter_info *gs_info,
2756 			     tree *dataref_ptr, tree *vec_offset)
2757 {
2758   gimple_seq stmts = NULL;
2759   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2760   if (stmts != NULL)
2761     {
2762       basic_block new_bb;
2763       edge pe = loop_preheader_edge (loop);
2764       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2765       gcc_assert (!new_bb);
2766     }
2767   tree offset_type = TREE_TYPE (gs_info->offset);
2768   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2769   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2770 					      offset_vectype);
2771 }
2772 
2773 /* Prepare to implement a grouped or strided load or store using
2774    the gather load or scatter store operation described by GS_INFO.
2775    STMT is the load or store statement.
2776 
2777    Set *DATAREF_BUMP to the amount that should be added to the base
2778    address after each copy of the vectorized statement.  Set *VEC_OFFSET
2779    to an invariant offset vector in which element I has the value
2780    I * DR_STEP / SCALE.  */
2781 
2782 static void
2783 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2784 				 gather_scatter_info *gs_info,
2785 				 tree *dataref_bump, tree *vec_offset)
2786 {
2787   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2788   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2789   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2790   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2791   gimple_seq stmts;
2792 
2793   tree bump = size_binop (MULT_EXPR,
2794 			  fold_convert (sizetype, DR_STEP (dr)),
2795 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2796   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2797   if (stmts)
2798     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2799 
2800   /* The offset given in GS_INFO can have pointer type, so use the element
2801      type of the vector instead.  */
2802   tree offset_type = TREE_TYPE (gs_info->offset);
2803   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2804   offset_type = TREE_TYPE (offset_vectype);
2805 
2806   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
2807   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2808 			  ssize_int (gs_info->scale));
2809   step = fold_convert (offset_type, step);
2810   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2811 
2812   /* Create {0, X, X*2, X*3, ...}.  */
2813   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2814 			      build_zero_cst (offset_type), step);
2815   if (stmts)
2816     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2817 }
2818 
2819 /* Return the amount that should be added to a vector pointer to move
2820    to the next or previous copy of AGGR_TYPE.  DR is the data reference
2821    being vectorized and MEMORY_ACCESS_TYPE describes the type of
2822    vectorization.  */
2823 
2824 static tree
2825 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2826 			     vect_memory_access_type memory_access_type)
2827 {
2828   if (memory_access_type == VMAT_INVARIANT)
2829     return size_zero_node;
2830 
2831   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2832   tree step = vect_dr_behavior (dr)->step;
2833   if (tree_int_cst_sgn (step) == -1)
2834     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2835   return iv_step;
2836 }
2837 
2838 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
2839 
2840 static bool
2841 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2842 		    gimple **vec_stmt, slp_tree slp_node,
2843 		    tree vectype_in, enum vect_def_type *dt)
2844 {
2845   tree op, vectype;
2846   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2847   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2848   unsigned ncopies;
2849   unsigned HOST_WIDE_INT nunits, num_bytes;
2850 
2851   op = gimple_call_arg (stmt, 0);
2852   vectype = STMT_VINFO_VECTYPE (stmt_info);
2853 
2854   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2855     return false;
2856 
2857   /* Multiple types in SLP are handled by creating the appropriate number of
2858      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2859      case of SLP.  */
2860   if (slp_node)
2861     ncopies = 1;
2862   else
2863     ncopies = vect_get_num_copies (loop_vinfo, vectype);
2864 
2865   gcc_assert (ncopies >= 1);
2866 
2867   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2868   if (! char_vectype)
2869     return false;
2870 
2871   if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2872     return false;
2873 
2874   unsigned word_bytes = num_bytes / nunits;
2875 
2876   /* The encoding uses one stepped pattern for each byte in the word.  */
2877   vec_perm_builder elts (num_bytes, word_bytes, 3);
2878   for (unsigned i = 0; i < 3; ++i)
2879     for (unsigned j = 0; j < word_bytes; ++j)
2880       elts.quick_push ((i + 1) * word_bytes - j - 1);
2881 
2882   vec_perm_indices indices (elts, 1, num_bytes);
2883   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2884     return false;
2885 
2886   if (! vec_stmt)
2887     {
2888       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2889       if (dump_enabled_p ())
2890         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2891                          "\n");
2892       if (! slp_node)
2893 	{
2894 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
2895 			 1, vector_stmt, stmt_info, 0, vect_prologue);
2896 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
2897 			 ncopies, vec_perm, stmt_info, 0, vect_body);
2898 	}
2899       return true;
2900     }
2901 
2902   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2903 
2904   /* Transform.  */
2905   vec<tree> vec_oprnds = vNULL;
2906   gimple *new_stmt = NULL;
2907   stmt_vec_info prev_stmt_info = NULL;
2908   for (unsigned j = 0; j < ncopies; j++)
2909     {
2910       /* Handle uses.  */
2911       if (j == 0)
2912         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2913       else
2914         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2915 
2916       /* Arguments are ready. create the new vector stmt.  */
2917       unsigned i;
2918       tree vop;
2919       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2920        {
2921 	 tree tem = make_ssa_name (char_vectype);
2922 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2923 						      char_vectype, vop));
2924 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2925 	 tree tem2 = make_ssa_name (char_vectype);
2926 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2927 					 tem, tem, bswap_vconst);
2928 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2929 	 tem = make_ssa_name (vectype);
2930 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2931 						      vectype, tem2));
2932 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933          if (slp_node)
2934            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2935        }
2936 
2937       if (slp_node)
2938         continue;
2939 
2940       if (j == 0)
2941         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2942       else
2943         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2944 
2945       prev_stmt_info = vinfo_for_stmt (new_stmt);
2946     }
2947 
2948   vec_oprnds.release ();
2949   return true;
2950 }
2951 
2952 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2953    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2954    in a single step.  On success, store the binary pack code in
2955    *CONVERT_CODE.  */
2956 
2957 static bool
2958 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2959 			  tree_code *convert_code)
2960 {
2961   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2962       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2963     return false;
2964 
2965   tree_code code;
2966   int multi_step_cvt = 0;
2967   auto_vec <tree, 8> interm_types;
2968   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2969 					&code, &multi_step_cvt,
2970 					&interm_types)
2971       || multi_step_cvt)
2972     return false;
2973 
2974   *convert_code = code;
2975   return true;
2976 }
2977 
2978 /* Function vectorizable_call.
2979 
2980    Check if GS performs a function call that can be vectorized.
2981    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2982    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2983    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2984 
2985 static bool
2986 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2987 		   slp_tree slp_node)
2988 {
2989   gcall *stmt;
2990   tree vec_dest;
2991   tree scalar_dest;
2992   tree op, type;
2993   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2994   stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2995   tree vectype_out, vectype_in;
2996   poly_uint64 nunits_in;
2997   poly_uint64 nunits_out;
2998   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2999   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3000   vec_info *vinfo = stmt_info->vinfo;
3001   tree fndecl, new_temp, rhs_type;
3002   gimple *def_stmt;
3003   enum vect_def_type dt[3]
3004     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3005   int ndts = 3;
3006   gimple *new_stmt = NULL;
3007   int ncopies, j;
3008   vec<tree> vargs = vNULL;
3009   enum { NARROW, NONE, WIDEN } modifier;
3010   size_t i, nargs;
3011   tree lhs;
3012 
3013   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3014     return false;
3015 
3016   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3017       && ! vec_stmt)
3018     return false;
3019 
3020   /* Is GS a vectorizable call?   */
3021   stmt = dyn_cast <gcall *> (gs);
3022   if (!stmt)
3023     return false;
3024 
3025   if (gimple_call_internal_p (stmt)
3026       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3027 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3028     /* Handled by vectorizable_load and vectorizable_store.  */
3029     return false;
3030 
3031   if (gimple_call_lhs (stmt) == NULL_TREE
3032       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3033     return false;
3034 
3035   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3036 
3037   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3038 
3039   /* Process function arguments.  */
3040   rhs_type = NULL_TREE;
3041   vectype_in = NULL_TREE;
3042   nargs = gimple_call_num_args (stmt);
3043 
3044   /* Bail out if the function has more than three arguments, we do not have
3045      interesting builtin functions to vectorize with more than two arguments
3046      except for fma.  No arguments is also not good.  */
3047   if (nargs == 0 || nargs > 3)
3048     return false;
3049 
3050   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
3051   if (gimple_call_internal_p (stmt)
3052       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3053     {
3054       nargs = 0;
3055       rhs_type = unsigned_type_node;
3056     }
3057 
3058   for (i = 0; i < nargs; i++)
3059     {
3060       tree opvectype;
3061 
3062       op = gimple_call_arg (stmt, i);
3063 
3064       /* We can only handle calls with arguments of the same type.  */
3065       if (rhs_type
3066 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3067 	{
3068 	  if (dump_enabled_p ())
3069 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3070                              "argument types differ.\n");
3071 	  return false;
3072 	}
3073       if (!rhs_type)
3074 	rhs_type = TREE_TYPE (op);
3075 
3076       if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3077 	{
3078 	  if (dump_enabled_p ())
3079 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080                              "use not simple.\n");
3081 	  return false;
3082 	}
3083 
3084       if (!vectype_in)
3085 	vectype_in = opvectype;
3086       else if (opvectype
3087 	       && opvectype != vectype_in)
3088 	{
3089 	  if (dump_enabled_p ())
3090 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3091                              "argument vector types differ.\n");
3092 	  return false;
3093 	}
3094     }
3095   /* If all arguments are external or constant defs use a vector type with
3096      the same size as the output vector type.  */
3097   if (!vectype_in)
3098     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3099   if (vec_stmt)
3100     gcc_assert (vectype_in);
3101   if (!vectype_in)
3102     {
3103       if (dump_enabled_p ())
3104         {
3105           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106                            "no vectype for scalar type ");
3107           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3108           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3109         }
3110 
3111       return false;
3112     }
3113 
3114   /* FORNOW */
3115   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3116   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3117   if (known_eq (nunits_in * 2, nunits_out))
3118     modifier = NARROW;
3119   else if (known_eq (nunits_out, nunits_in))
3120     modifier = NONE;
3121   else if (known_eq (nunits_out * 2, nunits_in))
3122     modifier = WIDEN;
3123   else
3124     return false;
3125 
3126   /* We only handle functions that do not read or clobber memory.  */
3127   if (gimple_vuse (stmt))
3128     {
3129       if (dump_enabled_p ())
3130 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131 			 "function reads from or writes to memory.\n");
3132       return false;
3133     }
3134 
3135   /* For now, we only vectorize functions if a target specific builtin
3136      is available.  TODO -- in some cases, it might be profitable to
3137      insert the calls for pieces of the vector, in order to be able
3138      to vectorize other operations in the loop.  */
3139   fndecl = NULL_TREE;
3140   internal_fn ifn = IFN_LAST;
3141   combined_fn cfn = gimple_call_combined_fn (stmt);
3142   tree callee = gimple_call_fndecl (stmt);
3143 
3144   /* First try using an internal function.  */
3145   tree_code convert_code = ERROR_MARK;
3146   if (cfn != CFN_LAST
3147       && (modifier == NONE
3148 	  || (modifier == NARROW
3149 	      && simple_integer_narrowing (vectype_out, vectype_in,
3150 					   &convert_code))))
3151     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3152 					  vectype_in);
3153 
3154   /* If that fails, try asking for a target-specific built-in function.  */
3155   if (ifn == IFN_LAST)
3156     {
3157       if (cfn != CFN_LAST)
3158 	fndecl = targetm.vectorize.builtin_vectorized_function
3159 	  (cfn, vectype_out, vectype_in);
3160       else if (callee)
3161 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3162 	  (callee, vectype_out, vectype_in);
3163     }
3164 
3165   if (ifn == IFN_LAST && !fndecl)
3166     {
3167       if (cfn == CFN_GOMP_SIMD_LANE
3168 	  && !slp_node
3169 	  && loop_vinfo
3170 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3171 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3172 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3173 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3174 	{
3175 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3176 	     { 0, 1, 2, ... vf - 1 } vector.  */
3177 	  gcc_assert (nargs == 0);
3178 	}
3179       else if (modifier == NONE
3180 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3181 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3182 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3183 	return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3184 				   vectype_in, dt);
3185       else
3186 	{
3187 	  if (dump_enabled_p ())
3188 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3189 			     "function is not vectorizable.\n");
3190 	  return false;
3191 	}
3192     }
3193 
3194   if (slp_node)
3195     ncopies = 1;
3196   else if (modifier == NARROW && ifn == IFN_LAST)
3197     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3198   else
3199     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3200 
3201   /* Sanity check: make sure that at least one copy of the vectorized stmt
3202      needs to be generated.  */
3203   gcc_assert (ncopies >= 1);
3204 
3205   if (!vec_stmt) /* transformation not required.  */
3206     {
3207       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3208       if (dump_enabled_p ())
3209         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3210                          "\n");
3211       if (!slp_node)
3212 	{
3213 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3214 	  if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3215 	    add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3216 			   vec_promote_demote, stmt_info, 0, vect_body);
3217 	}
3218 
3219       return true;
3220     }
3221 
3222   /* Transform.  */
3223 
3224   if (dump_enabled_p ())
3225     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3226 
3227   /* Handle def.  */
3228   scalar_dest = gimple_call_lhs (stmt);
3229   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3230 
3231   prev_stmt_info = NULL;
3232   if (modifier == NONE || ifn != IFN_LAST)
3233     {
3234       tree prev_res = NULL_TREE;
3235       for (j = 0; j < ncopies; ++j)
3236 	{
3237 	  /* Build argument list for the vectorized call.  */
3238 	  if (j == 0)
3239 	    vargs.create (nargs);
3240 	  else
3241 	    vargs.truncate (0);
3242 
3243 	  if (slp_node)
3244 	    {
3245 	      auto_vec<vec<tree> > vec_defs (nargs);
3246 	      vec<tree> vec_oprnds0;
3247 
3248 	      for (i = 0; i < nargs; i++)
3249 		vargs.quick_push (gimple_call_arg (stmt, i));
3250 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3251 	      vec_oprnds0 = vec_defs[0];
3252 
3253 	      /* Arguments are ready.  Create the new vector stmt.  */
3254 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3255 		{
3256 		  size_t k;
3257 		  for (k = 0; k < nargs; k++)
3258 		    {
3259 		      vec<tree> vec_oprndsk = vec_defs[k];
3260 		      vargs[k] = vec_oprndsk[i];
3261 		    }
3262 		  if (modifier == NARROW)
3263 		    {
3264 		      tree half_res = make_ssa_name (vectype_in);
3265 		      gcall *call
3266 			= gimple_build_call_internal_vec (ifn, vargs);
3267 		      gimple_call_set_lhs (call, half_res);
3268 		      gimple_call_set_nothrow (call, true);
3269 		      new_stmt = call;
3270 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3271 		      if ((i & 1) == 0)
3272 			{
3273 			  prev_res = half_res;
3274 			  continue;
3275 			}
3276 		      new_temp = make_ssa_name (vec_dest);
3277 		      new_stmt = gimple_build_assign (new_temp, convert_code,
3278 						      prev_res, half_res);
3279 		    }
3280 		  else
3281 		    {
3282 		      gcall *call;
3283 		      if (ifn != IFN_LAST)
3284 			call = gimple_build_call_internal_vec (ifn, vargs);
3285 		      else
3286 			call = gimple_build_call_vec (fndecl, vargs);
3287 		      new_temp = make_ssa_name (vec_dest, call);
3288 		      gimple_call_set_lhs (call, new_temp);
3289 		      gimple_call_set_nothrow (call, true);
3290 		      new_stmt = call;
3291 		    }
3292 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3293 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3294 		}
3295 
3296 	      for (i = 0; i < nargs; i++)
3297 		{
3298 		  vec<tree> vec_oprndsi = vec_defs[i];
3299 		  vec_oprndsi.release ();
3300 		}
3301 	      continue;
3302 	    }
3303 
3304 	  for (i = 0; i < nargs; i++)
3305 	    {
3306 	      op = gimple_call_arg (stmt, i);
3307 	      if (j == 0)
3308 		vec_oprnd0
3309 		  = vect_get_vec_def_for_operand (op, stmt);
3310 	      else
3311 		{
3312 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
3313 		  vec_oprnd0
3314                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3315 		}
3316 
3317 	      vargs.quick_push (vec_oprnd0);
3318 	    }
3319 
3320 	  if (gimple_call_internal_p (stmt)
3321 	      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3322 	    {
3323 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3324 	      tree new_var
3325 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3326 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3327 	      vect_init_vector_1 (stmt, init_stmt, NULL);
3328 	      new_temp = make_ssa_name (vec_dest);
3329 	      new_stmt = gimple_build_assign (new_temp, new_var);
3330 	    }
3331 	  else if (modifier == NARROW)
3332 	    {
3333 	      tree half_res = make_ssa_name (vectype_in);
3334 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3335 	      gimple_call_set_lhs (call, half_res);
3336 	      gimple_call_set_nothrow (call, true);
3337 	      new_stmt = call;
3338 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3339 	      if ((j & 1) == 0)
3340 		{
3341 		  prev_res = half_res;
3342 		  continue;
3343 		}
3344 	      new_temp = make_ssa_name (vec_dest);
3345 	      new_stmt = gimple_build_assign (new_temp, convert_code,
3346 					      prev_res, half_res);
3347 	    }
3348 	  else
3349 	    {
3350 	      gcall *call;
3351 	      if (ifn != IFN_LAST)
3352 		call = gimple_build_call_internal_vec (ifn, vargs);
3353 	      else
3354 		call = gimple_build_call_vec (fndecl, vargs);
3355 	      new_temp = make_ssa_name (vec_dest, new_stmt);
3356 	      gimple_call_set_lhs (call, new_temp);
3357 	      gimple_call_set_nothrow (call, true);
3358 	      new_stmt = call;
3359 	    }
3360 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361 
3362 	  if (j == (modifier == NARROW ? 1 : 0))
3363 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3364 	  else
3365 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3366 
3367 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3368 	}
3369     }
3370   else if (modifier == NARROW)
3371     {
3372       for (j = 0; j < ncopies; ++j)
3373 	{
3374 	  /* Build argument list for the vectorized call.  */
3375 	  if (j == 0)
3376 	    vargs.create (nargs * 2);
3377 	  else
3378 	    vargs.truncate (0);
3379 
3380 	  if (slp_node)
3381 	    {
3382 	      auto_vec<vec<tree> > vec_defs (nargs);
3383 	      vec<tree> vec_oprnds0;
3384 
3385 	      for (i = 0; i < nargs; i++)
3386 		vargs.quick_push (gimple_call_arg (stmt, i));
3387 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3388 	      vec_oprnds0 = vec_defs[0];
3389 
3390 	      /* Arguments are ready.  Create the new vector stmt.  */
3391 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3392 		{
3393 		  size_t k;
3394 		  vargs.truncate (0);
3395 		  for (k = 0; k < nargs; k++)
3396 		    {
3397 		      vec<tree> vec_oprndsk = vec_defs[k];
3398 		      vargs.quick_push (vec_oprndsk[i]);
3399 		      vargs.quick_push (vec_oprndsk[i + 1]);
3400 		    }
3401 		  gcall *call;
3402 		  if (ifn != IFN_LAST)
3403 		    call = gimple_build_call_internal_vec (ifn, vargs);
3404 		  else
3405 		    call = gimple_build_call_vec (fndecl, vargs);
3406 		  new_temp = make_ssa_name (vec_dest, call);
3407 		  gimple_call_set_lhs (call, new_temp);
3408 		  gimple_call_set_nothrow (call, true);
3409 		  new_stmt = call;
3410 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3411 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3412 		}
3413 
3414 	      for (i = 0; i < nargs; i++)
3415 		{
3416 		  vec<tree> vec_oprndsi = vec_defs[i];
3417 		  vec_oprndsi.release ();
3418 		}
3419 	      continue;
3420 	    }
3421 
3422 	  for (i = 0; i < nargs; i++)
3423 	    {
3424 	      op = gimple_call_arg (stmt, i);
3425 	      if (j == 0)
3426 		{
3427 		  vec_oprnd0
3428 		    = vect_get_vec_def_for_operand (op, stmt);
3429 		  vec_oprnd1
3430 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3431 		}
3432 	      else
3433 		{
3434 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3435 		  vec_oprnd0
3436 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3437 		  vec_oprnd1
3438 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3439 		}
3440 
3441 	      vargs.quick_push (vec_oprnd0);
3442 	      vargs.quick_push (vec_oprnd1);
3443 	    }
3444 
3445 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
3446 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3447 	  gimple_call_set_lhs (new_stmt, new_temp);
3448 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3449 
3450 	  if (j == 0)
3451 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3452 	  else
3453 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3454 
3455 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3456 	}
3457 
3458       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3459     }
3460   else
3461     /* No current target implements this case.  */
3462     return false;
3463 
3464   vargs.release ();
3465 
3466   /* The call in STMT might prevent it from being removed in dce.
3467      We however cannot remove it here, due to the way the ssa name
3468      it defines is mapped to the new definition.  So just replace
3469      rhs of the statement with something harmless.  */
3470 
3471   if (slp_node)
3472     return true;
3473 
3474   type = TREE_TYPE (scalar_dest);
3475   if (is_pattern_stmt_p (stmt_info))
3476     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3477   else
3478     lhs = gimple_call_lhs (stmt);
3479 
3480   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3481   set_vinfo_for_stmt (new_stmt, stmt_info);
3482   set_vinfo_for_stmt (stmt, NULL);
3483   STMT_VINFO_STMT (stmt_info) = new_stmt;
3484   gsi_replace (gsi, new_stmt, false);
3485 
3486   return true;
3487 }
3488 
3489 
3490 struct simd_call_arg_info
3491 {
3492   tree vectype;
3493   tree op;
3494   HOST_WIDE_INT linear_step;
3495   enum vect_def_type dt;
3496   unsigned int align;
3497   bool simd_lane_linear;
3498 };
3499 
3500 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3501    is linear within simd lane (but not within whole loop), note it in
3502    *ARGINFO.  */
3503 
3504 static void
3505 vect_simd_lane_linear (tree op, struct loop *loop,
3506 		       struct simd_call_arg_info *arginfo)
3507 {
3508   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3509 
3510   if (!is_gimple_assign (def_stmt)
3511       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3512       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3513     return;
3514 
3515   tree base = gimple_assign_rhs1 (def_stmt);
3516   HOST_WIDE_INT linear_step = 0;
3517   tree v = gimple_assign_rhs2 (def_stmt);
3518   while (TREE_CODE (v) == SSA_NAME)
3519     {
3520       tree t;
3521       def_stmt = SSA_NAME_DEF_STMT (v);
3522       if (is_gimple_assign (def_stmt))
3523 	switch (gimple_assign_rhs_code (def_stmt))
3524 	  {
3525 	  case PLUS_EXPR:
3526 	    t = gimple_assign_rhs2 (def_stmt);
3527 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3528 	      return;
3529 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3530 	    v = gimple_assign_rhs1 (def_stmt);
3531 	    continue;
3532 	  case MULT_EXPR:
3533 	    t = gimple_assign_rhs2 (def_stmt);
3534 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3535 	      return;
3536 	    linear_step = tree_to_shwi (t);
3537 	    v = gimple_assign_rhs1 (def_stmt);
3538 	    continue;
3539 	  CASE_CONVERT:
3540 	    t = gimple_assign_rhs1 (def_stmt);
3541 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3542 		|| (TYPE_PRECISION (TREE_TYPE (v))
3543 		    < TYPE_PRECISION (TREE_TYPE (t))))
3544 	      return;
3545 	    if (!linear_step)
3546 	      linear_step = 1;
3547 	    v = t;
3548 	    continue;
3549 	  default:
3550 	    return;
3551 	  }
3552       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3553 	       && loop->simduid
3554 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3555 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3556 		   == loop->simduid))
3557 	{
3558 	  if (!linear_step)
3559 	    linear_step = 1;
3560 	  arginfo->linear_step = linear_step;
3561 	  arginfo->op = base;
3562 	  arginfo->simd_lane_linear = true;
3563 	  return;
3564 	}
3565     }
3566 }
3567 
3568 /* Return the number of elements in vector type VECTYPE, which is associated
3569    with a SIMD clone.  At present these vectors always have a constant
3570    length.  */
3571 
3572 static unsigned HOST_WIDE_INT
3573 simd_clone_subparts (tree vectype)
3574 {
3575   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3576 }
3577 
3578 /* Function vectorizable_simd_clone_call.
3579 
3580    Check if STMT performs a function call that can be vectorized
3581    by calling a simd clone of the function.
3582    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3583    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3584    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3585 
3586 static bool
3587 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3588 			      gimple **vec_stmt, slp_tree slp_node)
3589 {
3590   tree vec_dest;
3591   tree scalar_dest;
3592   tree op, type;
3593   tree vec_oprnd0 = NULL_TREE;
3594   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3595   tree vectype;
3596   unsigned int nunits;
3597   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3598   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3599   vec_info *vinfo = stmt_info->vinfo;
3600   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3601   tree fndecl, new_temp;
3602   gimple *def_stmt;
3603   gimple *new_stmt = NULL;
3604   int ncopies, j;
3605   auto_vec<simd_call_arg_info> arginfo;
3606   vec<tree> vargs = vNULL;
3607   size_t i, nargs;
3608   tree lhs, rtype, ratype;
3609   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3610 
3611   /* Is STMT a vectorizable call?   */
3612   if (!is_gimple_call (stmt))
3613     return false;
3614 
3615   fndecl = gimple_call_fndecl (stmt);
3616   if (fndecl == NULL_TREE)
3617     return false;
3618 
3619   struct cgraph_node *node = cgraph_node::get (fndecl);
3620   if (node == NULL || node->simd_clones == NULL)
3621     return false;
3622 
3623   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3624     return false;
3625 
3626   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3627       && ! vec_stmt)
3628     return false;
3629 
3630   if (gimple_call_lhs (stmt)
3631       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3632     return false;
3633 
3634   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3635 
3636   vectype = STMT_VINFO_VECTYPE (stmt_info);
3637 
3638   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3639     return false;
3640 
3641   /* FORNOW */
3642   if (slp_node)
3643     return false;
3644 
3645   /* Process function arguments.  */
3646   nargs = gimple_call_num_args (stmt);
3647 
3648   /* Bail out if the function has zero arguments.  */
3649   if (nargs == 0)
3650     return false;
3651 
3652   arginfo.reserve (nargs, true);
3653 
3654   for (i = 0; i < nargs; i++)
3655     {
3656       simd_call_arg_info thisarginfo;
3657       affine_iv iv;
3658 
3659       thisarginfo.linear_step = 0;
3660       thisarginfo.align = 0;
3661       thisarginfo.op = NULL_TREE;
3662       thisarginfo.simd_lane_linear = false;
3663 
3664       op = gimple_call_arg (stmt, i);
3665       if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3666 			       &thisarginfo.vectype)
3667 	  || thisarginfo.dt == vect_uninitialized_def)
3668 	{
3669 	  if (dump_enabled_p ())
3670 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3671 			     "use not simple.\n");
3672 	  return false;
3673 	}
3674 
3675       if (thisarginfo.dt == vect_constant_def
3676 	  || thisarginfo.dt == vect_external_def)
3677 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3678       else
3679 	gcc_assert (thisarginfo.vectype != NULL_TREE);
3680 
3681       /* For linear arguments, the analyze phase should have saved
3682 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3683       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3684 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3685 	{
3686 	  gcc_assert (vec_stmt);
3687 	  thisarginfo.linear_step
3688 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3689 	  thisarginfo.op
3690 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3691 	  thisarginfo.simd_lane_linear
3692 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3693 	       == boolean_true_node);
3694 	  /* If loop has been peeled for alignment, we need to adjust it.  */
3695 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3696 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3697 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
3698 	    {
3699 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3700 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3701 	      tree opt = TREE_TYPE (thisarginfo.op);
3702 	      bias = fold_convert (TREE_TYPE (step), bias);
3703 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3704 	      thisarginfo.op
3705 		= fold_build2 (POINTER_TYPE_P (opt)
3706 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3707 			       thisarginfo.op, bias);
3708 	    }
3709 	}
3710       else if (!vec_stmt
3711 	       && thisarginfo.dt != vect_constant_def
3712 	       && thisarginfo.dt != vect_external_def
3713 	       && loop_vinfo
3714 	       && TREE_CODE (op) == SSA_NAME
3715 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
3716 			     &iv, false)
3717 	       && tree_fits_shwi_p (iv.step))
3718 	{
3719 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
3720 	  thisarginfo.op = iv.base;
3721 	}
3722       else if ((thisarginfo.dt == vect_constant_def
3723 		|| thisarginfo.dt == vect_external_def)
3724 	       && POINTER_TYPE_P (TREE_TYPE (op)))
3725 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3726       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3727 	 linear too.  */
3728       if (POINTER_TYPE_P (TREE_TYPE (op))
3729 	  && !thisarginfo.linear_step
3730 	  && !vec_stmt
3731 	  && thisarginfo.dt != vect_constant_def
3732 	  && thisarginfo.dt != vect_external_def
3733 	  && loop_vinfo
3734 	  && !slp_node
3735 	  && TREE_CODE (op) == SSA_NAME)
3736 	vect_simd_lane_linear (op, loop, &thisarginfo);
3737 
3738       arginfo.quick_push (thisarginfo);
3739     }
3740 
3741   unsigned HOST_WIDE_INT vf;
3742   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3743     {
3744       if (dump_enabled_p ())
3745 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3746 			 "not considering SIMD clones; not yet supported"
3747 			 " for variable-width vectors.\n");
3748       return false;
3749     }
3750 
3751   unsigned int badness = 0;
3752   struct cgraph_node *bestn = NULL;
3753   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3754     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3755   else
3756     for (struct cgraph_node *n = node->simd_clones; n != NULL;
3757 	 n = n->simdclone->next_clone)
3758       {
3759 	unsigned int this_badness = 0;
3760 	if (n->simdclone->simdlen > vf
3761 	    || n->simdclone->nargs != nargs)
3762 	  continue;
3763 	if (n->simdclone->simdlen < vf)
3764 	  this_badness += (exact_log2 (vf)
3765 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
3766 	if (n->simdclone->inbranch)
3767 	  this_badness += 2048;
3768 	int target_badness = targetm.simd_clone.usable (n);
3769 	if (target_badness < 0)
3770 	  continue;
3771 	this_badness += target_badness * 512;
3772 	/* FORNOW: Have to add code to add the mask argument.  */
3773 	if (n->simdclone->inbranch)
3774 	  continue;
3775 	for (i = 0; i < nargs; i++)
3776 	  {
3777 	    switch (n->simdclone->args[i].arg_type)
3778 	      {
3779 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
3780 		if (!useless_type_conversion_p
3781 			(n->simdclone->args[i].orig_type,
3782 			 TREE_TYPE (gimple_call_arg (stmt, i))))
3783 		  i = -1;
3784 		else if (arginfo[i].dt == vect_constant_def
3785 			 || arginfo[i].dt == vect_external_def
3786 			 || arginfo[i].linear_step)
3787 		  this_badness += 64;
3788 		break;
3789 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
3790 		if (arginfo[i].dt != vect_constant_def
3791 		    && arginfo[i].dt != vect_external_def)
3792 		  i = -1;
3793 		break;
3794 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3795 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3796 		if (arginfo[i].dt == vect_constant_def
3797 		    || arginfo[i].dt == vect_external_def
3798 		    || (arginfo[i].linear_step
3799 			!= n->simdclone->args[i].linear_step))
3800 		  i = -1;
3801 		break;
3802 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3803 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3804 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3805 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3806 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3807 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3808 		/* FORNOW */
3809 		i = -1;
3810 		break;
3811 	      case SIMD_CLONE_ARG_TYPE_MASK:
3812 		gcc_unreachable ();
3813 	      }
3814 	    if (i == (size_t) -1)
3815 	      break;
3816 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
3817 	      {
3818 		i = -1;
3819 		break;
3820 	      }
3821 	    if (arginfo[i].align)
3822 	      this_badness += (exact_log2 (arginfo[i].align)
3823 			       - exact_log2 (n->simdclone->args[i].alignment));
3824 	  }
3825 	if (i == (size_t) -1)
3826 	  continue;
3827 	if (bestn == NULL || this_badness < badness)
3828 	  {
3829 	    bestn = n;
3830 	    badness = this_badness;
3831 	  }
3832       }
3833 
3834   if (bestn == NULL)
3835     return false;
3836 
3837   for (i = 0; i < nargs; i++)
3838     if ((arginfo[i].dt == vect_constant_def
3839 	 || arginfo[i].dt == vect_external_def)
3840 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3841       {
3842 	arginfo[i].vectype
3843 	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3844 								     i)));
3845 	if (arginfo[i].vectype == NULL
3846 	    || (simd_clone_subparts (arginfo[i].vectype)
3847 		> bestn->simdclone->simdlen))
3848 	  return false;
3849       }
3850 
3851   fndecl = bestn->decl;
3852   nunits = bestn->simdclone->simdlen;
3853   ncopies = vf / nunits;
3854 
3855   /* If the function isn't const, only allow it in simd loops where user
3856      has asserted that at least nunits consecutive iterations can be
3857      performed using SIMD instructions.  */
3858   if ((loop == NULL || (unsigned) loop->safelen < nunits)
3859       && gimple_vuse (stmt))
3860     return false;
3861 
3862   /* Sanity check: make sure that at least one copy of the vectorized stmt
3863      needs to be generated.  */
3864   gcc_assert (ncopies >= 1);
3865 
3866   if (!vec_stmt) /* transformation not required.  */
3867     {
3868       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3869       for (i = 0; i < nargs; i++)
3870 	if ((bestn->simdclone->args[i].arg_type
3871 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3872 	    || (bestn->simdclone->args[i].arg_type
3873 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3874 	  {
3875 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3876 									+ 1);
3877 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3878 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3879 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
3880 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
3881 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3882 	    tree sll = arginfo[i].simd_lane_linear
3883 		       ? boolean_true_node : boolean_false_node;
3884 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3885 	  }
3886       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3887       if (dump_enabled_p ())
3888 	dump_printf_loc (MSG_NOTE, vect_location,
3889 			 "=== vectorizable_simd_clone_call ===\n");
3890 /*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3891       return true;
3892     }
3893 
3894   /* Transform.  */
3895 
3896   if (dump_enabled_p ())
3897     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3898 
3899   /* Handle def.  */
3900   scalar_dest = gimple_call_lhs (stmt);
3901   vec_dest = NULL_TREE;
3902   rtype = NULL_TREE;
3903   ratype = NULL_TREE;
3904   if (scalar_dest)
3905     {
3906       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3907       rtype = TREE_TYPE (TREE_TYPE (fndecl));
3908       if (TREE_CODE (rtype) == ARRAY_TYPE)
3909 	{
3910 	  ratype = rtype;
3911 	  rtype = TREE_TYPE (ratype);
3912 	}
3913     }
3914 
3915   prev_stmt_info = NULL;
3916   for (j = 0; j < ncopies; ++j)
3917     {
3918       /* Build argument list for the vectorized call.  */
3919       if (j == 0)
3920 	vargs.create (nargs);
3921       else
3922 	vargs.truncate (0);
3923 
3924       for (i = 0; i < nargs; i++)
3925 	{
3926 	  unsigned int k, l, m, o;
3927 	  tree atype;
3928 	  op = gimple_call_arg (stmt, i);
3929 	  switch (bestn->simdclone->args[i].arg_type)
3930 	    {
3931 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
3932 	      atype = bestn->simdclone->args[i].vector_type;
3933 	      o = nunits / simd_clone_subparts (atype);
3934 	      for (m = j * o; m < (j + 1) * o; m++)
3935 		{
3936 		  if (simd_clone_subparts (atype)
3937 		      < simd_clone_subparts (arginfo[i].vectype))
3938 		    {
3939 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3940 		      k = (simd_clone_subparts (arginfo[i].vectype)
3941 			   / simd_clone_subparts (atype));
3942 		      gcc_assert ((k & (k - 1)) == 0);
3943 		      if (m == 0)
3944 			vec_oprnd0
3945 			  = vect_get_vec_def_for_operand (op, stmt);
3946 		      else
3947 			{
3948 			  vec_oprnd0 = arginfo[i].op;
3949 			  if ((m & (k - 1)) == 0)
3950 			    vec_oprnd0
3951 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3952 								vec_oprnd0);
3953 			}
3954 		      arginfo[i].op = vec_oprnd0;
3955 		      vec_oprnd0
3956 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3957 				  bitsize_int (prec),
3958 				  bitsize_int ((m & (k - 1)) * prec));
3959 		      new_stmt
3960 			= gimple_build_assign (make_ssa_name (atype),
3961 					       vec_oprnd0);
3962 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3963 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
3964 		    }
3965 		  else
3966 		    {
3967 		      k = (simd_clone_subparts (atype)
3968 			   / simd_clone_subparts (arginfo[i].vectype));
3969 		      gcc_assert ((k & (k - 1)) == 0);
3970 		      vec<constructor_elt, va_gc> *ctor_elts;
3971 		      if (k != 1)
3972 			vec_alloc (ctor_elts, k);
3973 		      else
3974 			ctor_elts = NULL;
3975 		      for (l = 0; l < k; l++)
3976 			{
3977 			  if (m == 0 && l == 0)
3978 			    vec_oprnd0
3979 			      = vect_get_vec_def_for_operand (op, stmt);
3980 			  else
3981 			    vec_oprnd0
3982 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3983 								arginfo[i].op);
3984 			  arginfo[i].op = vec_oprnd0;
3985 			  if (k == 1)
3986 			    break;
3987 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3988 						  vec_oprnd0);
3989 			}
3990 		      if (k == 1)
3991 			vargs.safe_push (vec_oprnd0);
3992 		      else
3993 			{
3994 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
3995 			  new_stmt
3996 			    = gimple_build_assign (make_ssa_name (atype),
3997 						   vec_oprnd0);
3998 			  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3999 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4000 			}
4001 		    }
4002 		}
4003 	      break;
4004 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4005 	      vargs.safe_push (op);
4006 	      break;
4007 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4008 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4009 	      if (j == 0)
4010 		{
4011 		  gimple_seq stmts;
4012 		  arginfo[i].op
4013 		    = force_gimple_operand (arginfo[i].op, &stmts, true,
4014 					    NULL_TREE);
4015 		  if (stmts != NULL)
4016 		    {
4017 		      basic_block new_bb;
4018 		      edge pe = loop_preheader_edge (loop);
4019 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4020 		      gcc_assert (!new_bb);
4021 		    }
4022 		  if (arginfo[i].simd_lane_linear)
4023 		    {
4024 		      vargs.safe_push (arginfo[i].op);
4025 		      break;
4026 		    }
4027 		  tree phi_res = copy_ssa_name (op);
4028 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4029 		  set_vinfo_for_stmt (new_phi,
4030 				      new_stmt_vec_info (new_phi, loop_vinfo));
4031 		  add_phi_arg (new_phi, arginfo[i].op,
4032 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4033 		  enum tree_code code
4034 		    = POINTER_TYPE_P (TREE_TYPE (op))
4035 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4036 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4037 			      ? sizetype : TREE_TYPE (op);
4038 		  widest_int cst
4039 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4040 			       ncopies * nunits);
4041 		  tree tcst = wide_int_to_tree (type, cst);
4042 		  tree phi_arg = copy_ssa_name (op);
4043 		  new_stmt
4044 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4045 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4046 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4047 		  set_vinfo_for_stmt (new_stmt,
4048 				      new_stmt_vec_info (new_stmt, loop_vinfo));
4049 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4050 			       UNKNOWN_LOCATION);
4051 		  arginfo[i].op = phi_res;
4052 		  vargs.safe_push (phi_res);
4053 		}
4054 	      else
4055 		{
4056 		  enum tree_code code
4057 		    = POINTER_TYPE_P (TREE_TYPE (op))
4058 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4059 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4060 			      ? sizetype : TREE_TYPE (op);
4061 		  widest_int cst
4062 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4063 			       j * nunits);
4064 		  tree tcst = wide_int_to_tree (type, cst);
4065 		  new_temp = make_ssa_name (TREE_TYPE (op));
4066 		  new_stmt = gimple_build_assign (new_temp, code,
4067 						  arginfo[i].op, tcst);
4068 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4069 		  vargs.safe_push (new_temp);
4070 		}
4071 	      break;
4072 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4073 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4074 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4075 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4076 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4077 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4078 	    default:
4079 	      gcc_unreachable ();
4080 	    }
4081 	}
4082 
4083       new_stmt = gimple_build_call_vec (fndecl, vargs);
4084       if (vec_dest)
4085 	{
4086 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4087 	  if (ratype)
4088 	    new_temp = create_tmp_var (ratype);
4089 	  else if (simd_clone_subparts (vectype)
4090 		   == simd_clone_subparts (rtype))
4091 	    new_temp = make_ssa_name (vec_dest, new_stmt);
4092 	  else
4093 	    new_temp = make_ssa_name (rtype, new_stmt);
4094 	  gimple_call_set_lhs (new_stmt, new_temp);
4095 	}
4096       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4097 
4098       if (vec_dest)
4099 	{
4100 	  if (simd_clone_subparts (vectype) < nunits)
4101 	    {
4102 	      unsigned int k, l;
4103 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4104 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4105 	      k = nunits / simd_clone_subparts (vectype);
4106 	      gcc_assert ((k & (k - 1)) == 0);
4107 	      for (l = 0; l < k; l++)
4108 		{
4109 		  tree t;
4110 		  if (ratype)
4111 		    {
4112 		      t = build_fold_addr_expr (new_temp);
4113 		      t = build2 (MEM_REF, vectype, t,
4114 				  build_int_cst (TREE_TYPE (t), l * bytes));
4115 		    }
4116 		  else
4117 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4118 				bitsize_int (prec), bitsize_int (l * prec));
4119 		  new_stmt
4120 		    = gimple_build_assign (make_ssa_name (vectype), t);
4121 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4122 		  if (j == 0 && l == 0)
4123 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4124 		  else
4125 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126 
4127 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4128 		}
4129 
4130 	      if (ratype)
4131 		{
4132 		  tree clobber = build_constructor (ratype, NULL);
4133 		  TREE_THIS_VOLATILE (clobber) = 1;
4134 		  new_stmt = gimple_build_assign (new_temp, clobber);
4135 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4136 		}
4137 	      continue;
4138 	    }
4139 	  else if (simd_clone_subparts (vectype) > nunits)
4140 	    {
4141 	      unsigned int k = (simd_clone_subparts (vectype)
4142 				/ simd_clone_subparts (rtype));
4143 	      gcc_assert ((k & (k - 1)) == 0);
4144 	      if ((j & (k - 1)) == 0)
4145 		vec_alloc (ret_ctor_elts, k);
4146 	      if (ratype)
4147 		{
4148 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
4149 		  for (m = 0; m < o; m++)
4150 		    {
4151 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4152 					 size_int (m), NULL_TREE, NULL_TREE);
4153 		      new_stmt
4154 			= gimple_build_assign (make_ssa_name (rtype), tem);
4155 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4156 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4157 					      gimple_assign_lhs (new_stmt));
4158 		    }
4159 		  tree clobber = build_constructor (ratype, NULL);
4160 		  TREE_THIS_VOLATILE (clobber) = 1;
4161 		  new_stmt = gimple_build_assign (new_temp, clobber);
4162 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4163 		}
4164 	      else
4165 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4166 	      if ((j & (k - 1)) != k - 1)
4167 		continue;
4168 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4169 	      new_stmt
4170 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4171 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4172 
4173 	      if ((unsigned) j == k - 1)
4174 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4175 	      else
4176 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4177 
4178 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
4179 	      continue;
4180 	    }
4181 	  else if (ratype)
4182 	    {
4183 	      tree t = build_fold_addr_expr (new_temp);
4184 	      t = build2 (MEM_REF, vectype, t,
4185 			  build_int_cst (TREE_TYPE (t), 0));
4186 	      new_stmt
4187 		= gimple_build_assign (make_ssa_name (vec_dest), t);
4188 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4189 	      tree clobber = build_constructor (ratype, NULL);
4190 	      TREE_THIS_VOLATILE (clobber) = 1;
4191 	      vect_finish_stmt_generation (stmt,
4192 					   gimple_build_assign (new_temp,
4193 								clobber), gsi);
4194 	    }
4195 	}
4196 
4197       if (j == 0)
4198 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4199       else
4200 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4201 
4202       prev_stmt_info = vinfo_for_stmt (new_stmt);
4203     }
4204 
4205   vargs.release ();
4206 
4207   /* The call in STMT might prevent it from being removed in dce.
4208      We however cannot remove it here, due to the way the ssa name
4209      it defines is mapped to the new definition.  So just replace
4210      rhs of the statement with something harmless.  */
4211 
4212   if (slp_node)
4213     return true;
4214 
4215   if (scalar_dest)
4216     {
4217       type = TREE_TYPE (scalar_dest);
4218       if (is_pattern_stmt_p (stmt_info))
4219 	lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4220       else
4221 	lhs = gimple_call_lhs (stmt);
4222       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4223     }
4224   else
4225     new_stmt = gimple_build_nop ();
4226   set_vinfo_for_stmt (new_stmt, stmt_info);
4227   set_vinfo_for_stmt (stmt, NULL);
4228   STMT_VINFO_STMT (stmt_info) = new_stmt;
4229   gsi_replace (gsi, new_stmt, true);
4230   unlink_stmt_vdef (stmt);
4231 
4232   return true;
4233 }
4234 
4235 
4236 /* Function vect_gen_widened_results_half
4237 
4238    Create a vector stmt whose code, type, number of arguments, and result
4239    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4240    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
4241    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4242    needs to be created (DECL is a function-decl of a target-builtin).
4243    STMT is the original scalar stmt that we are vectorizing.  */
4244 
4245 static gimple *
4246 vect_gen_widened_results_half (enum tree_code code,
4247 			       tree decl,
4248                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4249 			       tree vec_dest, gimple_stmt_iterator *gsi,
4250 			       gimple *stmt)
4251 {
4252   gimple *new_stmt;
4253   tree new_temp;
4254 
4255   /* Generate half of the widened result:  */
4256   if (code == CALL_EXPR)
4257     {
4258       /* Target specific support  */
4259       if (op_type == binary_op)
4260 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4261       else
4262 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4263       new_temp = make_ssa_name (vec_dest, new_stmt);
4264       gimple_call_set_lhs (new_stmt, new_temp);
4265     }
4266   else
4267     {
4268       /* Generic support */
4269       gcc_assert (op_type == TREE_CODE_LENGTH (code));
4270       if (op_type != binary_op)
4271 	vec_oprnd1 = NULL;
4272       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4273       new_temp = make_ssa_name (vec_dest, new_stmt);
4274       gimple_assign_set_lhs (new_stmt, new_temp);
4275     }
4276   vect_finish_stmt_generation (stmt, new_stmt, gsi);
4277 
4278   return new_stmt;
4279 }
4280 
4281 
4282 /* Get vectorized definitions for loop-based vectorization.  For the first
4283    operand we call vect_get_vec_def_for_operand() (with OPRND containing
4284    scalar operand), and for the rest we get a copy with
4285    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4286    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4287    The vectors are collected into VEC_OPRNDS.  */
4288 
4289 static void
4290 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4291 			  vec<tree> *vec_oprnds, int multi_step_cvt)
4292 {
4293   tree vec_oprnd;
4294 
4295   /* Get first vector operand.  */
4296   /* All the vector operands except the very first one (that is scalar oprnd)
4297      are stmt copies.  */
4298   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4299     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4300   else
4301     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4302 
4303   vec_oprnds->quick_push (vec_oprnd);
4304 
4305   /* Get second vector operand.  */
4306   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4307   vec_oprnds->quick_push (vec_oprnd);
4308 
4309   *oprnd = vec_oprnd;
4310 
4311   /* For conversion in multiple steps, continue to get operands
4312      recursively.  */
4313   if (multi_step_cvt)
4314     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
4315 }
4316 
4317 
4318 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4319    For multi-step conversions store the resulting vectors and call the function
4320    recursively.  */
4321 
4322 static void
4323 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4324 				       int multi_step_cvt, gimple *stmt,
4325 				       vec<tree> vec_dsts,
4326 				       gimple_stmt_iterator *gsi,
4327 				       slp_tree slp_node, enum tree_code code,
4328 				       stmt_vec_info *prev_stmt_info)
4329 {
4330   unsigned int i;
4331   tree vop0, vop1, new_tmp, vec_dest;
4332   gimple *new_stmt;
4333   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4334 
4335   vec_dest = vec_dsts.pop ();
4336 
4337   for (i = 0; i < vec_oprnds->length (); i += 2)
4338     {
4339       /* Create demotion operation.  */
4340       vop0 = (*vec_oprnds)[i];
4341       vop1 = (*vec_oprnds)[i + 1];
4342       new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4343       new_tmp = make_ssa_name (vec_dest, new_stmt);
4344       gimple_assign_set_lhs (new_stmt, new_tmp);
4345       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4346 
4347       if (multi_step_cvt)
4348 	/* Store the resulting vector for next recursive call.  */
4349 	(*vec_oprnds)[i/2] = new_tmp;
4350       else
4351 	{
4352 	  /* This is the last step of the conversion sequence. Store the
4353 	     vectors in SLP_NODE or in vector info of the scalar statement
4354 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4355 	  if (slp_node)
4356 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4357 	  else
4358 	    {
4359 	      if (!*prev_stmt_info)
4360 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4361 	      else
4362 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4363 
4364 	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
4365 	    }
4366 	}
4367     }
4368 
4369   /* For multi-step demotion operations we first generate demotion operations
4370      from the source type to the intermediate types, and then combine the
4371      results (stored in VEC_OPRNDS) in demotion operation to the destination
4372      type.  */
4373   if (multi_step_cvt)
4374     {
4375       /* At each level of recursion we have half of the operands we had at the
4376 	 previous level.  */
4377       vec_oprnds->truncate ((i+1)/2);
4378       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4379 					     stmt, vec_dsts, gsi, slp_node,
4380 					     VEC_PACK_TRUNC_EXPR,
4381 					     prev_stmt_info);
4382     }
4383 
4384   vec_dsts.quick_push (vec_dest);
4385 }
4386 
4387 
4388 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4389    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
4390    the resulting vectors and call the function recursively.  */
4391 
4392 static void
4393 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4394 					vec<tree> *vec_oprnds1,
4395 					gimple *stmt, tree vec_dest,
4396 					gimple_stmt_iterator *gsi,
4397 					enum tree_code code1,
4398 					enum tree_code code2, tree decl1,
4399 					tree decl2, int op_type)
4400 {
4401   int i;
4402   tree vop0, vop1, new_tmp1, new_tmp2;
4403   gimple *new_stmt1, *new_stmt2;
4404   vec<tree> vec_tmp = vNULL;
4405 
4406   vec_tmp.create (vec_oprnds0->length () * 2);
4407   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4408     {
4409       if (op_type == binary_op)
4410 	vop1 = (*vec_oprnds1)[i];
4411       else
4412 	vop1 = NULL_TREE;
4413 
4414       /* Generate the two halves of promotion operation.  */
4415       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4416 						 op_type, vec_dest, gsi, stmt);
4417       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4418 						 op_type, vec_dest, gsi, stmt);
4419       if (is_gimple_call (new_stmt1))
4420 	{
4421 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4422 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4423 	}
4424       else
4425 	{
4426 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4427 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4428 	}
4429 
4430       /* Store the results for the next step.  */
4431       vec_tmp.quick_push (new_tmp1);
4432       vec_tmp.quick_push (new_tmp2);
4433     }
4434 
4435   vec_oprnds0->release ();
4436   *vec_oprnds0 = vec_tmp;
4437 }
4438 
4439 
4440 /* Check if STMT performs a conversion operation, that can be vectorized.
4441    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4442    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4443    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4444 
4445 static bool
4446 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4447 			 gimple **vec_stmt, slp_tree slp_node)
4448 {
4449   tree vec_dest;
4450   tree scalar_dest;
4451   tree op0, op1 = NULL_TREE;
4452   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4453   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4454   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4455   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4456   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4457   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4458   tree new_temp;
4459   gimple *def_stmt;
4460   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4461   int ndts = 2;
4462   gimple *new_stmt = NULL;
4463   stmt_vec_info prev_stmt_info;
4464   poly_uint64 nunits_in;
4465   poly_uint64 nunits_out;
4466   tree vectype_out, vectype_in;
4467   int ncopies, i, j;
4468   tree lhs_type, rhs_type;
4469   enum { NARROW, NONE, WIDEN } modifier;
4470   vec<tree> vec_oprnds0 = vNULL;
4471   vec<tree> vec_oprnds1 = vNULL;
4472   tree vop0;
4473   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4474   vec_info *vinfo = stmt_info->vinfo;
4475   int multi_step_cvt = 0;
4476   vec<tree> interm_types = vNULL;
4477   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4478   int op_type;
4479   unsigned short fltsz;
4480 
4481   /* Is STMT a vectorizable conversion?   */
4482 
4483   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4484     return false;
4485 
4486   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4487       && ! vec_stmt)
4488     return false;
4489 
4490   if (!is_gimple_assign (stmt))
4491     return false;
4492 
4493   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4494     return false;
4495 
4496   code = gimple_assign_rhs_code (stmt);
4497   if (!CONVERT_EXPR_CODE_P (code)
4498       && code != FIX_TRUNC_EXPR
4499       && code != FLOAT_EXPR
4500       && code != WIDEN_MULT_EXPR
4501       && code != WIDEN_LSHIFT_EXPR)
4502     return false;
4503 
4504   op_type = TREE_CODE_LENGTH (code);
4505 
4506   /* Check types of lhs and rhs.  */
4507   scalar_dest = gimple_assign_lhs (stmt);
4508   lhs_type = TREE_TYPE (scalar_dest);
4509   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4510 
4511   op0 = gimple_assign_rhs1 (stmt);
4512   rhs_type = TREE_TYPE (op0);
4513 
4514   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4515       && !((INTEGRAL_TYPE_P (lhs_type)
4516 	    && INTEGRAL_TYPE_P (rhs_type))
4517 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4518 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4519     return false;
4520 
4521   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4522       && ((INTEGRAL_TYPE_P (lhs_type)
4523 	   && !type_has_mode_precision_p (lhs_type))
4524 	  || (INTEGRAL_TYPE_P (rhs_type)
4525 	      && !type_has_mode_precision_p (rhs_type))))
4526     {
4527       if (dump_enabled_p ())
4528 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4529                          "type conversion to/from bit-precision unsupported."
4530                          "\n");
4531       return false;
4532     }
4533 
4534   /* Check the operands of the operation.  */
4535   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4536     {
4537       if (dump_enabled_p ())
4538 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4539                          "use not simple.\n");
4540       return false;
4541     }
4542   if (op_type == binary_op)
4543     {
4544       bool ok;
4545 
4546       op1 = gimple_assign_rhs2 (stmt);
4547       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4548       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4549 	 OP1.  */
4550       if (CONSTANT_CLASS_P (op0))
4551 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4552       else
4553 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4554 
4555       if (!ok)
4556 	{
4557           if (dump_enabled_p ())
4558             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4559                              "use not simple.\n");
4560 	  return false;
4561 	}
4562     }
4563 
4564   /* If op0 is an external or constant defs use a vector type of
4565      the same size as the output vector type.  */
4566   if (!vectype_in)
4567     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4568   if (vec_stmt)
4569     gcc_assert (vectype_in);
4570   if (!vectype_in)
4571     {
4572       if (dump_enabled_p ())
4573 	{
4574 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4575                            "no vectype for scalar type ");
4576 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4577           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4578 	}
4579 
4580       return false;
4581     }
4582 
4583   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4584       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4585     {
4586       if (dump_enabled_p ())
4587 	{
4588 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4589                            "can't convert between boolean and non "
4590 			   "boolean vectors");
4591 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4592           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4593 	}
4594 
4595       return false;
4596     }
4597 
4598   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4599   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4600   if (known_eq (nunits_out, nunits_in))
4601     modifier = NONE;
4602   else if (multiple_p (nunits_out, nunits_in))
4603     modifier = NARROW;
4604   else
4605     {
4606       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4607       modifier = WIDEN;
4608     }
4609 
4610   /* Multiple types in SLP are handled by creating the appropriate number of
4611      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4612      case of SLP.  */
4613   if (slp_node)
4614     ncopies = 1;
4615   else if (modifier == NARROW)
4616     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4617   else
4618     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4619 
4620   /* Sanity check: make sure that at least one copy of the vectorized stmt
4621      needs to be generated.  */
4622   gcc_assert (ncopies >= 1);
4623 
4624   bool found_mode = false;
4625   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4626   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4627   opt_scalar_mode rhs_mode_iter;
4628 
4629   /* Supportable by target?  */
4630   switch (modifier)
4631     {
4632     case NONE:
4633       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4634 	return false;
4635       if (supportable_convert_operation (code, vectype_out, vectype_in,
4636 					 &decl1, &code1))
4637 	break;
4638       /* FALLTHRU */
4639     unsupported:
4640       if (dump_enabled_p ())
4641 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4642                          "conversion not supported by target.\n");
4643       return false;
4644 
4645     case WIDEN:
4646       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4647 					  &code1, &code2, &multi_step_cvt,
4648 					  &interm_types))
4649 	{
4650 	  /* Binary widening operation can only be supported directly by the
4651 	     architecture.  */
4652 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4653 	  break;
4654 	}
4655 
4656       if (code != FLOAT_EXPR
4657 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4658 	goto unsupported;
4659 
4660       fltsz = GET_MODE_SIZE (lhs_mode);
4661       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4662 	{
4663 	  rhs_mode = rhs_mode_iter.require ();
4664 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4665 	    break;
4666 
4667 	  cvt_type
4668 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4669 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4670 	  if (cvt_type == NULL_TREE)
4671 	    goto unsupported;
4672 
4673 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4674 	    {
4675 	      if (!supportable_convert_operation (code, vectype_out,
4676 						  cvt_type, &decl1, &codecvt1))
4677 		goto unsupported;
4678 	    }
4679 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
4680 						    cvt_type, &codecvt1,
4681 						    &codecvt2, &multi_step_cvt,
4682 						    &interm_types))
4683 	    continue;
4684 	  else
4685 	    gcc_assert (multi_step_cvt == 0);
4686 
4687 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4688 					      vectype_in, &code1, &code2,
4689 					      &multi_step_cvt, &interm_types))
4690 	    {
4691 	      found_mode = true;
4692 	      break;
4693 	    }
4694 	}
4695 
4696       if (!found_mode)
4697 	goto unsupported;
4698 
4699       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4700 	codecvt2 = ERROR_MARK;
4701       else
4702 	{
4703 	  multi_step_cvt++;
4704 	  interm_types.safe_push (cvt_type);
4705 	  cvt_type = NULL_TREE;
4706 	}
4707       break;
4708 
4709     case NARROW:
4710       gcc_assert (op_type == unary_op);
4711       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4712 					   &code1, &multi_step_cvt,
4713 					   &interm_types))
4714 	break;
4715 
4716       if (code != FIX_TRUNC_EXPR
4717 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4718 	goto unsupported;
4719 
4720       cvt_type
4721 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4722       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4723       if (cvt_type == NULL_TREE)
4724 	goto unsupported;
4725       if (!supportable_convert_operation (code, cvt_type, vectype_in,
4726 					  &decl1, &codecvt1))
4727 	goto unsupported;
4728       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4729 					   &code1, &multi_step_cvt,
4730 					   &interm_types))
4731 	break;
4732       goto unsupported;
4733 
4734     default:
4735       gcc_unreachable ();
4736     }
4737 
4738   if (!vec_stmt)		/* transformation not required.  */
4739     {
4740       if (dump_enabled_p ())
4741 	dump_printf_loc (MSG_NOTE, vect_location,
4742                          "=== vectorizable_conversion ===\n");
4743       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4744         {
4745 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4746 	  if (!slp_node)
4747 	    vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4748 	}
4749       else if (modifier == NARROW)
4750 	{
4751 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4752 	  if (!slp_node)
4753 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4754 	}
4755       else
4756 	{
4757 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4758 	  if (!slp_node)
4759 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4760 	}
4761       interm_types.release ();
4762       return true;
4763     }
4764 
4765   /* Transform.  */
4766   if (dump_enabled_p ())
4767     dump_printf_loc (MSG_NOTE, vect_location,
4768                      "transform conversion. ncopies = %d.\n", ncopies);
4769 
4770   if (op_type == binary_op)
4771     {
4772       if (CONSTANT_CLASS_P (op0))
4773 	op0 = fold_convert (TREE_TYPE (op1), op0);
4774       else if (CONSTANT_CLASS_P (op1))
4775 	op1 = fold_convert (TREE_TYPE (op0), op1);
4776     }
4777 
4778   /* In case of multi-step conversion, we first generate conversion operations
4779      to the intermediate types, and then from that types to the final one.
4780      We create vector destinations for the intermediate type (TYPES) received
4781      from supportable_*_operation, and store them in the correct order
4782      for future use in vect_create_vectorized_*_stmts ().  */
4783   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4784   vec_dest = vect_create_destination_var (scalar_dest,
4785 					  (cvt_type && modifier == WIDEN)
4786 					  ? cvt_type : vectype_out);
4787   vec_dsts.quick_push (vec_dest);
4788 
4789   if (multi_step_cvt)
4790     {
4791       for (i = interm_types.length () - 1;
4792 	   interm_types.iterate (i, &intermediate_type); i--)
4793 	{
4794 	  vec_dest = vect_create_destination_var (scalar_dest,
4795 						  intermediate_type);
4796 	  vec_dsts.quick_push (vec_dest);
4797 	}
4798     }
4799 
4800   if (cvt_type)
4801     vec_dest = vect_create_destination_var (scalar_dest,
4802 					    modifier == WIDEN
4803 					    ? vectype_out : cvt_type);
4804 
4805   if (!slp_node)
4806     {
4807       if (modifier == WIDEN)
4808 	{
4809 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4810 	  if (op_type == binary_op)
4811 	    vec_oprnds1.create (1);
4812 	}
4813       else if (modifier == NARROW)
4814 	vec_oprnds0.create (
4815 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4816     }
4817   else if (code == WIDEN_LSHIFT_EXPR)
4818     vec_oprnds1.create (slp_node->vec_stmts_size);
4819 
4820   last_oprnd = op0;
4821   prev_stmt_info = NULL;
4822   switch (modifier)
4823     {
4824     case NONE:
4825       for (j = 0; j < ncopies; j++)
4826 	{
4827 	  if (j == 0)
4828 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4829 	  else
4830 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4831 
4832 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4833 	    {
4834 	      /* Arguments are ready, create the new vector stmt.  */
4835 	      if (code1 == CALL_EXPR)
4836 		{
4837 		  new_stmt = gimple_build_call (decl1, 1, vop0);
4838 		  new_temp = make_ssa_name (vec_dest, new_stmt);
4839 		  gimple_call_set_lhs (new_stmt, new_temp);
4840 		}
4841 	      else
4842 		{
4843 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4844 		  new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4845 		  new_temp = make_ssa_name (vec_dest, new_stmt);
4846 		  gimple_assign_set_lhs (new_stmt, new_temp);
4847 		}
4848 
4849 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4850 	      if (slp_node)
4851 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4852 	      else
4853 		{
4854 		  if (!prev_stmt_info)
4855 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4856 		  else
4857 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4858 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4859 		}
4860 	    }
4861 	}
4862       break;
4863 
4864     case WIDEN:
4865       /* In case the vectorization factor (VF) is bigger than the number
4866 	 of elements that we can fit in a vectype (nunits), we have to
4867 	 generate more than one vector stmt - i.e - we need to "unroll"
4868 	 the vector stmt by a factor VF/nunits.  */
4869       for (j = 0; j < ncopies; j++)
4870 	{
4871 	  /* Handle uses.  */
4872 	  if (j == 0)
4873 	    {
4874 	      if (slp_node)
4875 		{
4876 		  if (code == WIDEN_LSHIFT_EXPR)
4877 		    {
4878 		      unsigned int k;
4879 
4880 		      vec_oprnd1 = op1;
4881 		      /* Store vec_oprnd1 for every vector stmt to be created
4882 			 for SLP_NODE.  We check during the analysis that all
4883 			 the shift arguments are the same.  */
4884 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4885 			vec_oprnds1.quick_push (vec_oprnd1);
4886 
4887 		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4888 					 slp_node);
4889 		    }
4890 		  else
4891 		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4892 				       &vec_oprnds1, slp_node);
4893 		}
4894 	      else
4895 		{
4896 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4897 		  vec_oprnds0.quick_push (vec_oprnd0);
4898 		  if (op_type == binary_op)
4899 		    {
4900 		      if (code == WIDEN_LSHIFT_EXPR)
4901 			vec_oprnd1 = op1;
4902 		      else
4903 			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4904 		      vec_oprnds1.quick_push (vec_oprnd1);
4905 		    }
4906 		}
4907 	    }
4908 	  else
4909 	    {
4910 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4911 	      vec_oprnds0.truncate (0);
4912 	      vec_oprnds0.quick_push (vec_oprnd0);
4913 	      if (op_type == binary_op)
4914 		{
4915 		  if (code == WIDEN_LSHIFT_EXPR)
4916 		    vec_oprnd1 = op1;
4917 		  else
4918 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4919 								 vec_oprnd1);
4920 		  vec_oprnds1.truncate (0);
4921 		  vec_oprnds1.quick_push (vec_oprnd1);
4922 		}
4923 	    }
4924 
4925 	  /* Arguments are ready.  Create the new vector stmts.  */
4926 	  for (i = multi_step_cvt; i >= 0; i--)
4927 	    {
4928 	      tree this_dest = vec_dsts[i];
4929 	      enum tree_code c1 = code1, c2 = code2;
4930 	      if (i == 0 && codecvt2 != ERROR_MARK)
4931 		{
4932 		  c1 = codecvt1;
4933 		  c2 = codecvt2;
4934 		}
4935 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4936 						      &vec_oprnds1,
4937 						      stmt, this_dest, gsi,
4938 						      c1, c2, decl1, decl2,
4939 						      op_type);
4940 	    }
4941 
4942 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4943 	    {
4944 	      if (cvt_type)
4945 		{
4946 		  if (codecvt1 == CALL_EXPR)
4947 		    {
4948 		      new_stmt = gimple_build_call (decl1, 1, vop0);
4949 		      new_temp = make_ssa_name (vec_dest, new_stmt);
4950 		      gimple_call_set_lhs (new_stmt, new_temp);
4951 		    }
4952 		  else
4953 		    {
4954 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4955 		      new_temp = make_ssa_name (vec_dest);
4956 		      new_stmt = gimple_build_assign (new_temp, codecvt1,
4957 						      vop0);
4958 		    }
4959 
4960 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4961 		}
4962 	      else
4963 		new_stmt = SSA_NAME_DEF_STMT (vop0);
4964 
4965 	      if (slp_node)
4966 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4967 	      else
4968 		{
4969 		  if (!prev_stmt_info)
4970 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4971 		  else
4972 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4973 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4974 		}
4975 	    }
4976 	}
4977 
4978       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4979       break;
4980 
4981     case NARROW:
4982       /* In case the vectorization factor (VF) is bigger than the number
4983 	 of elements that we can fit in a vectype (nunits), we have to
4984 	 generate more than one vector stmt - i.e - we need to "unroll"
4985 	 the vector stmt by a factor VF/nunits.  */
4986       for (j = 0; j < ncopies; j++)
4987 	{
4988 	  /* Handle uses.  */
4989 	  if (slp_node)
4990 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4991 			       slp_node);
4992 	  else
4993 	    {
4994 	      vec_oprnds0.truncate (0);
4995 	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4996 					vect_pow2 (multi_step_cvt) - 1);
4997 	    }
4998 
4999 	  /* Arguments are ready.  Create the new vector stmts.  */
5000 	  if (cvt_type)
5001 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5002 	      {
5003 		if (codecvt1 == CALL_EXPR)
5004 		  {
5005 		    new_stmt = gimple_build_call (decl1, 1, vop0);
5006 		    new_temp = make_ssa_name (vec_dest, new_stmt);
5007 		    gimple_call_set_lhs (new_stmt, new_temp);
5008 		  }
5009 		else
5010 		  {
5011 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5012 		    new_temp = make_ssa_name (vec_dest);
5013 		    new_stmt = gimple_build_assign (new_temp, codecvt1,
5014 						    vop0);
5015 		  }
5016 
5017 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
5018 		vec_oprnds0[i] = new_temp;
5019 	      }
5020 
5021 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5022 						 stmt, vec_dsts, gsi,
5023 						 slp_node, code1,
5024 						 &prev_stmt_info);
5025 	}
5026 
5027       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5028       break;
5029     }
5030 
5031   vec_oprnds0.release ();
5032   vec_oprnds1.release ();
5033   interm_types.release ();
5034 
5035   return true;
5036 }
5037 
5038 
5039 /* Function vectorizable_assignment.
5040 
5041    Check if STMT performs an assignment (copy) that can be vectorized.
5042    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5043    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5044    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5045 
5046 static bool
5047 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5048 			 gimple **vec_stmt, slp_tree slp_node)
5049 {
5050   tree vec_dest;
5051   tree scalar_dest;
5052   tree op;
5053   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5054   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5055   tree new_temp;
5056   gimple *def_stmt;
5057   enum vect_def_type dt[1] = {vect_unknown_def_type};
5058   int ndts = 1;
5059   int ncopies;
5060   int i, j;
5061   vec<tree> vec_oprnds = vNULL;
5062   tree vop;
5063   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5064   vec_info *vinfo = stmt_info->vinfo;
5065   gimple *new_stmt = NULL;
5066   stmt_vec_info prev_stmt_info = NULL;
5067   enum tree_code code;
5068   tree vectype_in;
5069 
5070   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5071     return false;
5072 
5073   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5074       && ! vec_stmt)
5075     return false;
5076 
5077   /* Is vectorizable assignment?  */
5078   if (!is_gimple_assign (stmt))
5079     return false;
5080 
5081   scalar_dest = gimple_assign_lhs (stmt);
5082   if (TREE_CODE (scalar_dest) != SSA_NAME)
5083     return false;
5084 
5085   code = gimple_assign_rhs_code (stmt);
5086   if (gimple_assign_single_p (stmt)
5087       || code == PAREN_EXPR
5088       || CONVERT_EXPR_CODE_P (code))
5089     op = gimple_assign_rhs1 (stmt);
5090   else
5091     return false;
5092 
5093   if (code == VIEW_CONVERT_EXPR)
5094     op = TREE_OPERAND (op, 0);
5095 
5096   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5097   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5098 
5099   /* Multiple types in SLP are handled by creating the appropriate number of
5100      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5101      case of SLP.  */
5102   if (slp_node)
5103     ncopies = 1;
5104   else
5105     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5106 
5107   gcc_assert (ncopies >= 1);
5108 
5109   if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5110     {
5111       if (dump_enabled_p ())
5112         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5113                          "use not simple.\n");
5114       return false;
5115     }
5116 
5117   /* We can handle NOP_EXPR conversions that do not change the number
5118      of elements or the vector size.  */
5119   if ((CONVERT_EXPR_CODE_P (code)
5120        || code == VIEW_CONVERT_EXPR)
5121       && (!vectype_in
5122 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5123 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5124 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5125     return false;
5126 
5127   /* We do not handle bit-precision changes.  */
5128   if ((CONVERT_EXPR_CODE_P (code)
5129        || code == VIEW_CONVERT_EXPR)
5130       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5131       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5132 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5133       /* But a conversion that does not change the bit-pattern is ok.  */
5134       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5135 	    > TYPE_PRECISION (TREE_TYPE (op)))
5136 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
5137       /* Conversion between boolean types of different sizes is
5138 	 a simple assignment in case their vectypes are same
5139 	 boolean vectors.  */
5140       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5141 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5142     {
5143       if (dump_enabled_p ())
5144         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5145                          "type conversion to/from bit-precision "
5146                          "unsupported.\n");
5147       return false;
5148     }
5149 
5150   if (!vec_stmt) /* transformation not required.  */
5151     {
5152       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5153       if (dump_enabled_p ())
5154         dump_printf_loc (MSG_NOTE, vect_location,
5155                          "=== vectorizable_assignment ===\n");
5156       if (!slp_node)
5157 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5158       return true;
5159     }
5160 
5161   /* Transform.  */
5162   if (dump_enabled_p ())
5163     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5164 
5165   /* Handle def.  */
5166   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5167 
5168   /* Handle use.  */
5169   for (j = 0; j < ncopies; j++)
5170     {
5171       /* Handle uses.  */
5172       if (j == 0)
5173         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5174       else
5175         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5176 
5177       /* Arguments are ready. create the new vector stmt.  */
5178       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5179        {
5180 	 if (CONVERT_EXPR_CODE_P (code)
5181 	     || code == VIEW_CONVERT_EXPR)
5182 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5183          new_stmt = gimple_build_assign (vec_dest, vop);
5184          new_temp = make_ssa_name (vec_dest, new_stmt);
5185          gimple_assign_set_lhs (new_stmt, new_temp);
5186          vect_finish_stmt_generation (stmt, new_stmt, gsi);
5187          if (slp_node)
5188            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5189        }
5190 
5191       if (slp_node)
5192         continue;
5193 
5194       if (j == 0)
5195         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5196       else
5197         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5198 
5199       prev_stmt_info = vinfo_for_stmt (new_stmt);
5200     }
5201 
5202   vec_oprnds.release ();
5203   return true;
5204 }
5205 
5206 
5207 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5208    either as shift by a scalar or by a vector.  */
5209 
5210 bool
5211 vect_supportable_shift (enum tree_code code, tree scalar_type)
5212 {
5213 
5214   machine_mode vec_mode;
5215   optab optab;
5216   int icode;
5217   tree vectype;
5218 
5219   vectype = get_vectype_for_scalar_type (scalar_type);
5220   if (!vectype)
5221     return false;
5222 
5223   optab = optab_for_tree_code (code, vectype, optab_scalar);
5224   if (!optab
5225       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5226     {
5227       optab = optab_for_tree_code (code, vectype, optab_vector);
5228       if (!optab
5229           || (optab_handler (optab, TYPE_MODE (vectype))
5230                       == CODE_FOR_nothing))
5231         return false;
5232     }
5233 
5234   vec_mode = TYPE_MODE (vectype);
5235   icode = (int) optab_handler (optab, vec_mode);
5236   if (icode == CODE_FOR_nothing)
5237     return false;
5238 
5239   return true;
5240 }
5241 
5242 
5243 /* Function vectorizable_shift.
5244 
5245    Check if STMT performs a shift operation that can be vectorized.
5246    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5247    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5248    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5249 
5250 static bool
5251 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5252                     gimple **vec_stmt, slp_tree slp_node)
5253 {
5254   tree vec_dest;
5255   tree scalar_dest;
5256   tree op0, op1 = NULL;
5257   tree vec_oprnd1 = NULL_TREE;
5258   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5259   tree vectype;
5260   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5261   enum tree_code code;
5262   machine_mode vec_mode;
5263   tree new_temp;
5264   optab optab;
5265   int icode;
5266   machine_mode optab_op2_mode;
5267   gimple *def_stmt;
5268   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5269   int ndts = 2;
5270   gimple *new_stmt = NULL;
5271   stmt_vec_info prev_stmt_info;
5272   poly_uint64 nunits_in;
5273   poly_uint64 nunits_out;
5274   tree vectype_out;
5275   tree op1_vectype;
5276   int ncopies;
5277   int j, i;
5278   vec<tree> vec_oprnds0 = vNULL;
5279   vec<tree> vec_oprnds1 = vNULL;
5280   tree vop0, vop1;
5281   unsigned int k;
5282   bool scalar_shift_arg = true;
5283   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5284   vec_info *vinfo = stmt_info->vinfo;
5285 
5286   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5287     return false;
5288 
5289   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5290       && ! vec_stmt)
5291     return false;
5292 
5293   /* Is STMT a vectorizable binary/unary operation?   */
5294   if (!is_gimple_assign (stmt))
5295     return false;
5296 
5297   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5298     return false;
5299 
5300   code = gimple_assign_rhs_code (stmt);
5301 
5302   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5303       || code == RROTATE_EXPR))
5304     return false;
5305 
5306   scalar_dest = gimple_assign_lhs (stmt);
5307   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5308   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5309     {
5310       if (dump_enabled_p ())
5311         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5312                          "bit-precision shifts not supported.\n");
5313       return false;
5314     }
5315 
5316   op0 = gimple_assign_rhs1 (stmt);
5317   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5318     {
5319       if (dump_enabled_p ())
5320         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5321                          "use not simple.\n");
5322       return false;
5323     }
5324   /* If op0 is an external or constant def use a vector type with
5325      the same size as the output vector type.  */
5326   if (!vectype)
5327     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5328   if (vec_stmt)
5329     gcc_assert (vectype);
5330   if (!vectype)
5331     {
5332       if (dump_enabled_p ())
5333         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334                          "no vectype for scalar type\n");
5335       return false;
5336     }
5337 
5338   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5339   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5340   if (maybe_ne (nunits_out, nunits_in))
5341     return false;
5342 
5343   op1 = gimple_assign_rhs2 (stmt);
5344   if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5345     {
5346       if (dump_enabled_p ())
5347         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5348                          "use not simple.\n");
5349       return false;
5350     }
5351 
5352   /* Multiple types in SLP are handled by creating the appropriate number of
5353      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5354      case of SLP.  */
5355   if (slp_node)
5356     ncopies = 1;
5357   else
5358     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5359 
5360   gcc_assert (ncopies >= 1);
5361 
5362   /* Determine whether the shift amount is a vector, or scalar.  If the
5363      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5364 
5365   if ((dt[1] == vect_internal_def
5366        || dt[1] == vect_induction_def)
5367       && !slp_node)
5368     scalar_shift_arg = false;
5369   else if (dt[1] == vect_constant_def
5370 	   || dt[1] == vect_external_def
5371 	   || dt[1] == vect_internal_def)
5372     {
5373       /* In SLP, need to check whether the shift count is the same,
5374 	 in loops if it is a constant or invariant, it is always
5375 	 a scalar shift.  */
5376       if (slp_node)
5377 	{
5378 	  vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5379 	  gimple *slpstmt;
5380 
5381 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5382 	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5383 	      scalar_shift_arg = false;
5384 
5385 	  /* For internal SLP defs we have to make sure we see scalar stmts
5386 	     for all vector elements.
5387 	     ???  For different vectors we could resort to a different
5388 	     scalar shift operand but code-generation below simply always
5389 	     takes the first.  */
5390 	  if (dt[1] == vect_internal_def
5391 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),                           stmts.length ()))
5392 	    scalar_shift_arg = false;
5393 	}
5394 
5395       /* If the shift amount is computed by a pattern stmt we cannot
5396          use the scalar amount directly thus give up and use a vector
5397 	 shift.  */
5398       if (dt[1] == vect_internal_def)
5399 	{
5400 	  gimple *def = SSA_NAME_DEF_STMT (op1);
5401 	  if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5402 	    scalar_shift_arg = false;
5403 	}
5404     }
5405   else
5406     {
5407       if (dump_enabled_p ())
5408         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5409                          "operand mode requires invariant argument.\n");
5410       return false;
5411     }
5412 
5413   /* Vector shifted by vector.  */
5414   if (!scalar_shift_arg)
5415     {
5416       optab = optab_for_tree_code (code, vectype, optab_vector);
5417       if (dump_enabled_p ())
5418         dump_printf_loc (MSG_NOTE, vect_location,
5419                          "vector/vector shift/rotate found.\n");
5420 
5421       if (!op1_vectype)
5422 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5423       if (op1_vectype == NULL_TREE
5424 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5425 	{
5426 	  if (dump_enabled_p ())
5427 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5428                              "unusable type for last operand in"
5429                              " vector/vector shift/rotate.\n");
5430 	  return false;
5431 	}
5432     }
5433   /* See if the machine has a vector shifted by scalar insn and if not
5434      then see if it has a vector shifted by vector insn.  */
5435   else
5436     {
5437       optab = optab_for_tree_code (code, vectype, optab_scalar);
5438       if (optab
5439           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5440         {
5441           if (dump_enabled_p ())
5442             dump_printf_loc (MSG_NOTE, vect_location,
5443                              "vector/scalar shift/rotate found.\n");
5444         }
5445       else
5446         {
5447           optab = optab_for_tree_code (code, vectype, optab_vector);
5448           if (optab
5449                && (optab_handler (optab, TYPE_MODE (vectype))
5450                       != CODE_FOR_nothing))
5451             {
5452 	      scalar_shift_arg = false;
5453 
5454               if (dump_enabled_p ())
5455                 dump_printf_loc (MSG_NOTE, vect_location,
5456                                  "vector/vector shift/rotate found.\n");
5457 
5458               /* Unlike the other binary operators, shifts/rotates have
5459                  the rhs being int, instead of the same type as the lhs,
5460                  so make sure the scalar is the right type if we are
5461 		 dealing with vectors of long long/long/short/char.  */
5462               if (dt[1] == vect_constant_def)
5463                 op1 = fold_convert (TREE_TYPE (vectype), op1);
5464 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5465 						   TREE_TYPE (op1)))
5466 		{
5467 		  if (slp_node
5468 		      && TYPE_MODE (TREE_TYPE (vectype))
5469 			 != TYPE_MODE (TREE_TYPE (op1)))
5470 		    {
5471                       if (dump_enabled_p ())
5472                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5473                                          "unusable type for last operand in"
5474                                          " vector/vector shift/rotate.\n");
5475 		      return false;
5476 		    }
5477 		  if (vec_stmt && !slp_node)
5478 		    {
5479 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
5480 		      op1 = vect_init_vector (stmt, op1,
5481 					      TREE_TYPE (vectype), NULL);
5482 		    }
5483 		}
5484             }
5485         }
5486     }
5487 
5488   /* Supportable by target?  */
5489   if (!optab)
5490     {
5491       if (dump_enabled_p ())
5492         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5493                          "no optab.\n");
5494       return false;
5495     }
5496   vec_mode = TYPE_MODE (vectype);
5497   icode = (int) optab_handler (optab, vec_mode);
5498   if (icode == CODE_FOR_nothing)
5499     {
5500       if (dump_enabled_p ())
5501         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5502                          "op not supported by target.\n");
5503       /* Check only during analysis.  */
5504       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5505 	  || (!vec_stmt
5506 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5507         return false;
5508       if (dump_enabled_p ())
5509         dump_printf_loc (MSG_NOTE, vect_location,
5510                          "proceeding using word mode.\n");
5511     }
5512 
5513   /* Worthwhile without SIMD support?  Check only during analysis.  */
5514   if (!vec_stmt
5515       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5516       && !vect_worthwhile_without_simd_p (vinfo, code))
5517     {
5518       if (dump_enabled_p ())
5519         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520                          "not worthwhile without SIMD support.\n");
5521       return false;
5522     }
5523 
5524   if (!vec_stmt) /* transformation not required.  */
5525     {
5526       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5527       if (dump_enabled_p ())
5528         dump_printf_loc (MSG_NOTE, vect_location,
5529                          "=== vectorizable_shift ===\n");
5530       if (!slp_node)
5531 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5532       return true;
5533     }
5534 
5535   /* Transform.  */
5536 
5537   if (dump_enabled_p ())
5538     dump_printf_loc (MSG_NOTE, vect_location,
5539                      "transform binary/unary operation.\n");
5540 
5541   /* Handle def.  */
5542   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5543 
5544   prev_stmt_info = NULL;
5545   for (j = 0; j < ncopies; j++)
5546     {
5547       /* Handle uses.  */
5548       if (j == 0)
5549         {
5550           if (scalar_shift_arg)
5551             {
5552               /* Vector shl and shr insn patterns can be defined with scalar
5553                  operand 2 (shift operand).  In this case, use constant or loop
5554                  invariant op1 directly, without extending it to vector mode
5555                  first.  */
5556               optab_op2_mode = insn_data[icode].operand[2].mode;
5557               if (!VECTOR_MODE_P (optab_op2_mode))
5558                 {
5559                   if (dump_enabled_p ())
5560                     dump_printf_loc (MSG_NOTE, vect_location,
5561                                      "operand 1 using scalar mode.\n");
5562                   vec_oprnd1 = op1;
5563                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5564                   vec_oprnds1.quick_push (vec_oprnd1);
5565                   if (slp_node)
5566                     {
5567                       /* Store vec_oprnd1 for every vector stmt to be created
5568                          for SLP_NODE.  We check during the analysis that all
5569                          the shift arguments are the same.
5570                          TODO: Allow different constants for different vector
5571                          stmts generated for an SLP instance.  */
5572                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5573                         vec_oprnds1.quick_push (vec_oprnd1);
5574                     }
5575                 }
5576             }
5577 
5578           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5579              (a special case for certain kind of vector shifts); otherwise,
5580              operand 1 should be of a vector type (the usual case).  */
5581           if (vec_oprnd1)
5582             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5583                                slp_node);
5584           else
5585             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5586                                slp_node);
5587         }
5588       else
5589         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5590 
5591       /* Arguments are ready.  Create the new vector stmt.  */
5592       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5593         {
5594           vop1 = vec_oprnds1[i];
5595 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5596           new_temp = make_ssa_name (vec_dest, new_stmt);
5597           gimple_assign_set_lhs (new_stmt, new_temp);
5598           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5599           if (slp_node)
5600             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5601         }
5602 
5603       if (slp_node)
5604         continue;
5605 
5606       if (j == 0)
5607         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5608       else
5609         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5610       prev_stmt_info = vinfo_for_stmt (new_stmt);
5611     }
5612 
5613   vec_oprnds0.release ();
5614   vec_oprnds1.release ();
5615 
5616   return true;
5617 }
5618 
5619 
5620 /* Function vectorizable_operation.
5621 
5622    Check if STMT performs a binary, unary or ternary operation that can
5623    be vectorized.
5624    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5625    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5626    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5627 
5628 static bool
5629 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5630 			gimple **vec_stmt, slp_tree slp_node)
5631 {
5632   tree vec_dest;
5633   tree scalar_dest;
5634   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5635   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5636   tree vectype;
5637   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5638   enum tree_code code, orig_code;
5639   machine_mode vec_mode;
5640   tree new_temp;
5641   int op_type;
5642   optab optab;
5643   bool target_support_p;
5644   gimple *def_stmt;
5645   enum vect_def_type dt[3]
5646     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5647   int ndts = 3;
5648   gimple *new_stmt = NULL;
5649   stmt_vec_info prev_stmt_info;
5650   poly_uint64 nunits_in;
5651   poly_uint64 nunits_out;
5652   tree vectype_out;
5653   int ncopies;
5654   int j, i;
5655   vec<tree> vec_oprnds0 = vNULL;
5656   vec<tree> vec_oprnds1 = vNULL;
5657   vec<tree> vec_oprnds2 = vNULL;
5658   tree vop0, vop1, vop2;
5659   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5660   vec_info *vinfo = stmt_info->vinfo;
5661 
5662   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5663     return false;
5664 
5665   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5666       && ! vec_stmt)
5667     return false;
5668 
5669   /* Is STMT a vectorizable binary/unary operation?   */
5670   if (!is_gimple_assign (stmt))
5671     return false;
5672 
5673   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5674     return false;
5675 
5676   orig_code = code = gimple_assign_rhs_code (stmt);
5677 
5678   /* For pointer addition and subtraction, we should use the normal
5679      plus and minus for the vector operation.  */
5680   if (code == POINTER_PLUS_EXPR)
5681     code = PLUS_EXPR;
5682   if (code == POINTER_DIFF_EXPR)
5683     code = MINUS_EXPR;
5684 
5685   /* Support only unary or binary operations.  */
5686   op_type = TREE_CODE_LENGTH (code);
5687   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5688     {
5689       if (dump_enabled_p ())
5690         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5691                          "num. args = %d (not unary/binary/ternary op).\n",
5692                          op_type);
5693       return false;
5694     }
5695 
5696   scalar_dest = gimple_assign_lhs (stmt);
5697   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5698 
5699   /* Most operations cannot handle bit-precision types without extra
5700      truncations.  */
5701   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5702       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5703       /* Exception are bitwise binary operations.  */
5704       && code != BIT_IOR_EXPR
5705       && code != BIT_XOR_EXPR
5706       && code != BIT_AND_EXPR)
5707     {
5708       if (dump_enabled_p ())
5709         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5710                          "bit-precision arithmetic not supported.\n");
5711       return false;
5712     }
5713 
5714   op0 = gimple_assign_rhs1 (stmt);
5715   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5716     {
5717       if (dump_enabled_p ())
5718         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5719                          "use not simple.\n");
5720       return false;
5721     }
5722   /* If op0 is an external or constant def use a vector type with
5723      the same size as the output vector type.  */
5724   if (!vectype)
5725     {
5726       /* For boolean type we cannot determine vectype by
5727 	 invariant value (don't know whether it is a vector
5728 	 of booleans or vector of integers).  We use output
5729 	 vectype because operations on boolean don't change
5730 	 type.  */
5731       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5732 	{
5733 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5734 	    {
5735 	      if (dump_enabled_p ())
5736 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5737 				 "not supported operation on bool value.\n");
5738 	      return false;
5739 	    }
5740 	  vectype = vectype_out;
5741 	}
5742       else
5743 	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5744     }
5745   if (vec_stmt)
5746     gcc_assert (vectype);
5747   if (!vectype)
5748     {
5749       if (dump_enabled_p ())
5750         {
5751           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5752                            "no vectype for scalar type ");
5753           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5754                              TREE_TYPE (op0));
5755           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5756         }
5757 
5758       return false;
5759     }
5760 
5761   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5762   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5763   if (maybe_ne (nunits_out, nunits_in))
5764     return false;
5765 
5766   if (op_type == binary_op || op_type == ternary_op)
5767     {
5768       op1 = gimple_assign_rhs2 (stmt);
5769       if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5770 	{
5771 	  if (dump_enabled_p ())
5772 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773                              "use not simple.\n");
5774 	  return false;
5775 	}
5776     }
5777   if (op_type == ternary_op)
5778     {
5779       op2 = gimple_assign_rhs3 (stmt);
5780       if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5781 	{
5782 	  if (dump_enabled_p ())
5783 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5784                              "use not simple.\n");
5785 	  return false;
5786 	}
5787     }
5788 
5789   /* Multiple types in SLP are handled by creating the appropriate number of
5790      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5791      case of SLP.  */
5792   if (slp_node)
5793     ncopies = 1;
5794   else
5795     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5796 
5797   gcc_assert (ncopies >= 1);
5798 
5799   /* Shifts are handled in vectorizable_shift ().  */
5800   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5801       || code == RROTATE_EXPR)
5802    return false;
5803 
5804   /* Supportable by target?  */
5805 
5806   vec_mode = TYPE_MODE (vectype);
5807   if (code == MULT_HIGHPART_EXPR)
5808     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5809   else
5810     {
5811       optab = optab_for_tree_code (code, vectype, optab_default);
5812       if (!optab)
5813 	{
5814           if (dump_enabled_p ())
5815             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5816                              "no optab.\n");
5817 	  return false;
5818 	}
5819       target_support_p = (optab_handler (optab, vec_mode)
5820 			  != CODE_FOR_nothing);
5821     }
5822 
5823   if (!target_support_p)
5824     {
5825       if (dump_enabled_p ())
5826 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5827                          "op not supported by target.\n");
5828       /* Check only during analysis.  */
5829       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5830 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5831         return false;
5832       if (dump_enabled_p ())
5833 	dump_printf_loc (MSG_NOTE, vect_location,
5834                          "proceeding using word mode.\n");
5835     }
5836 
5837   /* Worthwhile without SIMD support?  Check only during analysis.  */
5838   if (!VECTOR_MODE_P (vec_mode)
5839       && !vec_stmt
5840       && !vect_worthwhile_without_simd_p (vinfo, code))
5841     {
5842       if (dump_enabled_p ())
5843         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5844                          "not worthwhile without SIMD support.\n");
5845       return false;
5846     }
5847 
5848   if (!vec_stmt) /* transformation not required.  */
5849     {
5850       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5851       if (dump_enabled_p ())
5852         dump_printf_loc (MSG_NOTE, vect_location,
5853                          "=== vectorizable_operation ===\n");
5854       if (!slp_node)
5855 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5856       return true;
5857     }
5858 
5859   /* Transform.  */
5860 
5861   if (dump_enabled_p ())
5862     dump_printf_loc (MSG_NOTE, vect_location,
5863                      "transform binary/unary operation.\n");
5864 
5865   /* Handle def.  */
5866   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5867 
5868   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5869      vectors with unsigned elements, but the result is signed.  So, we
5870      need to compute the MINUS_EXPR into vectype temporary and
5871      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
5872   tree vec_cvt_dest = NULL_TREE;
5873   if (orig_code == POINTER_DIFF_EXPR)
5874     vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5875 
5876   /* In case the vectorization factor (VF) is bigger than the number
5877      of elements that we can fit in a vectype (nunits), we have to generate
5878      more than one vector stmt - i.e - we need to "unroll" the
5879      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
5880      from one copy of the vector stmt to the next, in the field
5881      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
5882      stages to find the correct vector defs to be used when vectorizing
5883      stmts that use the defs of the current stmt.  The example below
5884      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5885      we need to create 4 vectorized stmts):
5886 
5887      before vectorization:
5888                                 RELATED_STMT    VEC_STMT
5889         S1:     x = memref      -               -
5890         S2:     z = x + 1       -               -
5891 
5892      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5893              there):
5894                                 RELATED_STMT    VEC_STMT
5895         VS1_0:  vx0 = memref0   VS1_1           -
5896         VS1_1:  vx1 = memref1   VS1_2           -
5897         VS1_2:  vx2 = memref2   VS1_3           -
5898         VS1_3:  vx3 = memref3   -               -
5899         S1:     x = load        -               VS1_0
5900         S2:     z = x + 1       -               -
5901 
5902      step2: vectorize stmt S2 (done here):
5903         To vectorize stmt S2 we first need to find the relevant vector
5904         def for the first operand 'x'.  This is, as usual, obtained from
5905         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5906         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
5907         relevant vector def 'vx0'.  Having found 'vx0' we can generate
5908         the vector stmt VS2_0, and as usual, record it in the
5909         STMT_VINFO_VEC_STMT of stmt S2.
5910         When creating the second copy (VS2_1), we obtain the relevant vector
5911         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5912         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
5913         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
5914         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5915         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
5916         chain of stmts and pointers:
5917                                 RELATED_STMT    VEC_STMT
5918         VS1_0:  vx0 = memref0   VS1_1           -
5919         VS1_1:  vx1 = memref1   VS1_2           -
5920         VS1_2:  vx2 = memref2   VS1_3           -
5921         VS1_3:  vx3 = memref3   -               -
5922         S1:     x = load        -               VS1_0
5923         VS2_0:  vz0 = vx0 + v1  VS2_1           -
5924         VS2_1:  vz1 = vx1 + v1  VS2_2           -
5925         VS2_2:  vz2 = vx2 + v1  VS2_3           -
5926         VS2_3:  vz3 = vx3 + v1  -               -
5927         S2:     z = x + 1       -               VS2_0  */
5928 
5929   prev_stmt_info = NULL;
5930   for (j = 0; j < ncopies; j++)
5931     {
5932       /* Handle uses.  */
5933       if (j == 0)
5934 	{
5935 	  if (op_type == binary_op)
5936 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5937 			       slp_node);
5938 	  else if (op_type == ternary_op)
5939 	    {
5940 	      if (slp_node)
5941 		{
5942 		  auto_vec<tree> ops(3);
5943 		  ops.quick_push (op0);
5944 		  ops.quick_push (op1);
5945 		  ops.quick_push (op2);
5946 		  auto_vec<vec<tree> > vec_defs(3);
5947 		  vect_get_slp_defs (ops, slp_node, &vec_defs);
5948 		  vec_oprnds0 = vec_defs[0];
5949 		  vec_oprnds1 = vec_defs[1];
5950 		  vec_oprnds2 = vec_defs[2];
5951 		}
5952 	      else
5953 		{
5954 		  vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5955 				     NULL);
5956 		  vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5957 				     NULL);
5958 		}
5959 	    }
5960 	  else
5961 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5962 			       slp_node);
5963 	}
5964       else
5965 	{
5966 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5967 	  if (op_type == ternary_op)
5968 	    {
5969 	      tree vec_oprnd = vec_oprnds2.pop ();
5970 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5971 							           vec_oprnd));
5972 	    }
5973 	}
5974 
5975       /* Arguments are ready.  Create the new vector stmt.  */
5976       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5977         {
5978 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
5979 		  ? vec_oprnds1[i] : NULL_TREE);
5980 	  vop2 = ((op_type == ternary_op)
5981 		  ? vec_oprnds2[i] : NULL_TREE);
5982 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5983 	  new_temp = make_ssa_name (vec_dest, new_stmt);
5984 	  gimple_assign_set_lhs (new_stmt, new_temp);
5985 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5986 	  if (vec_cvt_dest)
5987 	    {
5988 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5989 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5990 					      new_temp);
5991 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5992 	      gimple_assign_set_lhs (new_stmt, new_temp);
5993 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
5994 	    }
5995           if (slp_node)
5996 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5997         }
5998 
5999       if (slp_node)
6000         continue;
6001 
6002       if (j == 0)
6003 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6004       else
6005 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6006       prev_stmt_info = vinfo_for_stmt (new_stmt);
6007     }
6008 
6009   vec_oprnds0.release ();
6010   vec_oprnds1.release ();
6011   vec_oprnds2.release ();
6012 
6013   return true;
6014 }
6015 
6016 /* A helper function to ensure data reference DR's base alignment.  */
6017 
6018 static void
6019 ensure_base_align (struct data_reference *dr)
6020 {
6021   if (!dr->aux)
6022     return;
6023 
6024   if (DR_VECT_AUX (dr)->base_misaligned)
6025     {
6026       tree base_decl = DR_VECT_AUX (dr)->base_decl;
6027 
6028       unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6029 
6030       if (decl_in_symtab_p (base_decl))
6031 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6032       else
6033 	{
6034 	  SET_DECL_ALIGN (base_decl, align_base_to);
6035           DECL_USER_ALIGN (base_decl) = 1;
6036 	}
6037       DR_VECT_AUX (dr)->base_misaligned = false;
6038     }
6039 }
6040 
6041 
6042 /* Function get_group_alias_ptr_type.
6043 
6044    Return the alias type for the group starting at FIRST_STMT.  */
6045 
6046 static tree
6047 get_group_alias_ptr_type (gimple *first_stmt)
6048 {
6049   struct data_reference *first_dr, *next_dr;
6050   gimple *next_stmt;
6051 
6052   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6053   next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6054   while (next_stmt)
6055     {
6056       next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6057       if (get_alias_set (DR_REF (first_dr))
6058 	  != get_alias_set (DR_REF (next_dr)))
6059 	{
6060 	  if (dump_enabled_p ())
6061 	    dump_printf_loc (MSG_NOTE, vect_location,
6062 			     "conflicting alias set types.\n");
6063 	  return ptr_type_node;
6064 	}
6065       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6066     }
6067   return reference_alias_ptr_type (DR_REF (first_dr));
6068 }
6069 
6070 
6071 /* Function vectorizable_store.
6072 
6073    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6074    can be vectorized.
6075    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6076    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6077    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6078 
6079 static bool
6080 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6081                     slp_tree slp_node)
6082 {
6083   tree data_ref;
6084   tree op;
6085   tree vec_oprnd = NULL_TREE;
6086   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6087   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6088   tree elem_type;
6089   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6090   struct loop *loop = NULL;
6091   machine_mode vec_mode;
6092   tree dummy;
6093   enum dr_alignment_support alignment_support_scheme;
6094   gimple *def_stmt;
6095   enum vect_def_type rhs_dt = vect_unknown_def_type;
6096   enum vect_def_type mask_dt = vect_unknown_def_type;
6097   stmt_vec_info prev_stmt_info = NULL;
6098   tree dataref_ptr = NULL_TREE;
6099   tree dataref_offset = NULL_TREE;
6100   gimple *ptr_incr = NULL;
6101   int ncopies;
6102   int j;
6103   gimple *next_stmt, *first_stmt;
6104   bool grouped_store;
6105   unsigned int group_size, i;
6106   vec<tree> oprnds = vNULL;
6107   vec<tree> result_chain = vNULL;
6108   bool inv_p;
6109   tree offset = NULL_TREE;
6110   vec<tree> vec_oprnds = vNULL;
6111   bool slp = (slp_node != NULL);
6112   unsigned int vec_num;
6113   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6114   vec_info *vinfo = stmt_info->vinfo;
6115   tree aggr_type;
6116   gather_scatter_info gs_info;
6117   gimple *new_stmt;
6118   poly_uint64 vf;
6119   vec_load_store_type vls_type;
6120   tree ref_type;
6121 
6122   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6123     return false;
6124 
6125   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6126       && ! vec_stmt)
6127     return false;
6128 
6129   /* Is vectorizable store? */
6130 
6131   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6132   if (is_gimple_assign (stmt))
6133     {
6134       tree scalar_dest = gimple_assign_lhs (stmt);
6135       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6136 	  && is_pattern_stmt_p (stmt_info))
6137 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
6138       if (TREE_CODE (scalar_dest) != ARRAY_REF
6139 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6140 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
6141 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
6142 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6143 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
6144 	  && TREE_CODE (scalar_dest) != MEM_REF)
6145 	return false;
6146     }
6147   else
6148     {
6149       gcall *call = dyn_cast <gcall *> (stmt);
6150       if (!call || !gimple_call_internal_p (call))
6151 	return false;
6152 
6153       internal_fn ifn = gimple_call_internal_fn (call);
6154       if (!internal_store_fn_p (ifn))
6155 	return false;
6156 
6157       if (slp_node != NULL)
6158 	{
6159 	  if (dump_enabled_p ())
6160 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6161 			     "SLP of masked stores not supported.\n");
6162 	  return false;
6163 	}
6164 
6165       int mask_index = internal_fn_mask_index (ifn);
6166       if (mask_index >= 0)
6167 	{
6168 	  mask = gimple_call_arg (call, mask_index);
6169 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6170 					   &mask_vectype))
6171 	    return false;
6172 	}
6173     }
6174 
6175   op = vect_get_store_rhs (stmt);
6176 
6177   /* Cannot have hybrid store SLP -- that would mean storing to the
6178      same location twice.  */
6179   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6180 
6181   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6182   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6183 
6184   if (loop_vinfo)
6185     {
6186       loop = LOOP_VINFO_LOOP (loop_vinfo);
6187       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6188     }
6189   else
6190     vf = 1;
6191 
6192   /* Multiple types in SLP are handled by creating the appropriate number of
6193      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6194      case of SLP.  */
6195   if (slp)
6196     ncopies = 1;
6197   else
6198     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6199 
6200   gcc_assert (ncopies >= 1);
6201 
6202   /* FORNOW.  This restriction should be relaxed.  */
6203   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6204     {
6205       if (dump_enabled_p ())
6206 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 			 "multiple types in nested loop.\n");
6208       return false;
6209     }
6210 
6211   if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6212     return false;
6213 
6214   elem_type = TREE_TYPE (vectype);
6215   vec_mode = TYPE_MODE (vectype);
6216 
6217   if (!STMT_VINFO_DATA_REF (stmt_info))
6218     return false;
6219 
6220   vect_memory_access_type memory_access_type;
6221   if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6222 			    &memory_access_type, &gs_info))
6223     return false;
6224 
6225   if (mask)
6226     {
6227       if (memory_access_type == VMAT_CONTIGUOUS)
6228 	{
6229 	  if (!VECTOR_MODE_P (vec_mode)
6230 	      || !can_vec_mask_load_store_p (vec_mode,
6231 					     TYPE_MODE (mask_vectype), false))
6232 	    return false;
6233 	}
6234       else if (memory_access_type != VMAT_LOAD_STORE_LANES
6235 	       && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6236 	{
6237 	  if (dump_enabled_p ())
6238 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6239 			     "unsupported access type for masked store.\n");
6240 	  return false;
6241 	}
6242     }
6243   else
6244     {
6245       /* FORNOW. In some cases can vectorize even if data-type not supported
6246 	 (e.g. - array initialization with 0).  */
6247       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6248 	return false;
6249     }
6250 
6251   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6252 		   && memory_access_type != VMAT_GATHER_SCATTER
6253 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
6254   if (grouped_store)
6255     {
6256       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6257       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6258       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6259     }
6260   else
6261     {
6262       first_stmt = stmt;
6263       first_dr = dr;
6264       group_size = vec_num = 1;
6265     }
6266 
6267   if (!vec_stmt) /* transformation not required.  */
6268     {
6269       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6270 
6271       if (loop_vinfo
6272 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6273 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6274 				  memory_access_type, &gs_info);
6275 
6276       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6277       /* The SLP costs are calculated during SLP analysis.  */
6278       if (!slp_node)
6279 	vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6280 			       vls_type, NULL, NULL, NULL);
6281       return true;
6282     }
6283   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6284 
6285   /* Transform.  */
6286 
6287   ensure_base_align (dr);
6288 
6289   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6290     {
6291       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6292       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6293       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6294       tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6295       edge pe = loop_preheader_edge (loop);
6296       gimple_seq seq;
6297       basic_block new_bb;
6298       enum { NARROW, NONE, WIDEN } modifier;
6299       poly_uint64 scatter_off_nunits
6300 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6301 
6302       if (known_eq (nunits, scatter_off_nunits))
6303 	modifier = NONE;
6304       else if (known_eq (nunits * 2, scatter_off_nunits))
6305 	{
6306 	  modifier = WIDEN;
6307 
6308 	  /* Currently gathers and scatters are only supported for
6309 	     fixed-length vectors.  */
6310 	  unsigned int count = scatter_off_nunits.to_constant ();
6311 	  vec_perm_builder sel (count, count, 1);
6312 	  for (i = 0; i < (unsigned int) count; ++i)
6313 	    sel.quick_push (i | (count / 2));
6314 
6315 	  vec_perm_indices indices (sel, 1, count);
6316 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6317 						  indices);
6318 	  gcc_assert (perm_mask != NULL_TREE);
6319 	}
6320       else if (known_eq (nunits, scatter_off_nunits * 2))
6321 	{
6322 	  modifier = NARROW;
6323 
6324 	  /* Currently gathers and scatters are only supported for
6325 	     fixed-length vectors.  */
6326 	  unsigned int count = nunits.to_constant ();
6327 	  vec_perm_builder sel (count, count, 1);
6328 	  for (i = 0; i < (unsigned int) count; ++i)
6329 	    sel.quick_push (i | (count / 2));
6330 
6331 	  vec_perm_indices indices (sel, 2, count);
6332 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6333 	  gcc_assert (perm_mask != NULL_TREE);
6334 	  ncopies *= 2;
6335 	}
6336       else
6337 	gcc_unreachable ();
6338 
6339       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6340       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6341       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6342       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6343       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6344       scaletype = TREE_VALUE (arglist);
6345 
6346       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6347 			   && TREE_CODE (rettype) == VOID_TYPE);
6348 
6349       ptr = fold_convert (ptrtype, gs_info.base);
6350       if (!is_gimple_min_invariant (ptr))
6351 	{
6352 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6353 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6354 	  gcc_assert (!new_bb);
6355 	}
6356 
6357       /* Currently we support only unconditional scatter stores,
6358 	 so mask should be all ones.  */
6359       mask = build_int_cst (masktype, -1);
6360       mask = vect_init_vector (stmt, mask, masktype, NULL);
6361 
6362       scale = build_int_cst (scaletype, gs_info.scale);
6363 
6364       prev_stmt_info = NULL;
6365       for (j = 0; j < ncopies; ++j)
6366 	{
6367 	  if (j == 0)
6368 	    {
6369 	      src = vec_oprnd1
6370 		= vect_get_vec_def_for_operand (op, stmt);
6371 	      op = vec_oprnd0
6372 		= vect_get_vec_def_for_operand (gs_info.offset, stmt);
6373 	    }
6374 	  else if (modifier != NONE && (j & 1))
6375 	    {
6376 	      if (modifier == WIDEN)
6377 		{
6378 		  src = vec_oprnd1
6379 		    = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6380 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6381 					     stmt, gsi);
6382 		}
6383 	      else if (modifier == NARROW)
6384 		{
6385 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6386 					      stmt, gsi);
6387 		  op = vec_oprnd0
6388 		    = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6389 						      vec_oprnd0);
6390 		}
6391 	      else
6392 		gcc_unreachable ();
6393 	    }
6394 	  else
6395 	    {
6396 	      src = vec_oprnd1
6397 		= vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6398 	      op = vec_oprnd0
6399 		= vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6400 						  vec_oprnd0);
6401 	    }
6402 
6403 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6404 	    {
6405 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6406 				    TYPE_VECTOR_SUBPARTS (srctype)));
6407 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
6408 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6409 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6410 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6411 	      src = var;
6412 	    }
6413 
6414 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6415 	    {
6416 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6417 				    TYPE_VECTOR_SUBPARTS (idxtype)));
6418 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6419 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6420 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6421 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6422 	      op = var;
6423 	    }
6424 
6425 	  new_stmt
6426 	    = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6427 
6428 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6429 
6430 	  if (prev_stmt_info == NULL)
6431 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6432 	  else
6433 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6434 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
6435 	}
6436       return true;
6437     }
6438 
6439   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6440     {
6441       gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6442       GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6443     }
6444 
6445   if (grouped_store)
6446     {
6447       /* FORNOW */
6448       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6449 
6450       /* We vectorize all the stmts of the interleaving group when we
6451 	 reach the last stmt in the group.  */
6452       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6453 	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6454 	  && !slp)
6455 	{
6456 	  *vec_stmt = NULL;
6457 	  return true;
6458 	}
6459 
6460       if (slp)
6461         {
6462           grouped_store = false;
6463           /* VEC_NUM is the number of vect stmts to be created for this
6464              group.  */
6465           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6466           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6467 	  gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6468           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6469 	  op = vect_get_store_rhs (first_stmt);
6470         }
6471       else
6472         /* VEC_NUM is the number of vect stmts to be created for this
6473            group.  */
6474 	vec_num = group_size;
6475 
6476       ref_type = get_group_alias_ptr_type (first_stmt);
6477     }
6478   else
6479     ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6480 
6481   if (dump_enabled_p ())
6482     dump_printf_loc (MSG_NOTE, vect_location,
6483                      "transform store. ncopies = %d\n", ncopies);
6484 
6485   if (memory_access_type == VMAT_ELEMENTWISE
6486       || memory_access_type == VMAT_STRIDED_SLP)
6487     {
6488       gimple_stmt_iterator incr_gsi;
6489       bool insert_after;
6490       gimple *incr;
6491       tree offvar;
6492       tree ivstep;
6493       tree running_off;
6494       tree stride_base, stride_step, alias_off;
6495       tree vec_oprnd;
6496       unsigned int g;
6497       /* Checked by get_load_store_type.  */
6498       unsigned int const_nunits = nunits.to_constant ();
6499 
6500       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6501       gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6502 
6503       stride_base
6504 	= fold_build_pointer_plus
6505 	    (DR_BASE_ADDRESS (first_dr),
6506 	     size_binop (PLUS_EXPR,
6507 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6508 			 convert_to_ptrofftype (DR_INIT (first_dr))));
6509       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6510 
6511       /* For a store with loop-invariant (but other than power-of-2)
6512          stride (i.e. not a grouped access) like so:
6513 
6514 	   for (i = 0; i < n; i += stride)
6515 	     array[i] = ...;
6516 
6517 	 we generate a new induction variable and new stores from
6518 	 the components of the (vectorized) rhs:
6519 
6520 	   for (j = 0; ; j += VF*stride)
6521 	     vectemp = ...;
6522 	     tmp1 = vectemp[0];
6523 	     array[j] = tmp1;
6524 	     tmp2 = vectemp[1];
6525 	     array[j + stride] = tmp2;
6526 	     ...
6527          */
6528 
6529       unsigned nstores = const_nunits;
6530       unsigned lnel = 1;
6531       tree ltype = elem_type;
6532       tree lvectype = vectype;
6533       if (slp)
6534 	{
6535 	  if (group_size < const_nunits
6536 	      && const_nunits % group_size == 0)
6537 	    {
6538 	      nstores = const_nunits / group_size;
6539 	      lnel = group_size;
6540 	      ltype = build_vector_type (elem_type, group_size);
6541 	      lvectype = vectype;
6542 
6543 	      /* First check if vec_extract optab doesn't support extraction
6544 		 of vector elts directly.  */
6545 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6546 	      machine_mode vmode;
6547 	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
6548 		  || !VECTOR_MODE_P (vmode)
6549 		  || !targetm.vector_mode_supported_p (vmode)
6550 		  || (convert_optab_handler (vec_extract_optab,
6551 					     TYPE_MODE (vectype), vmode)
6552 		      == CODE_FOR_nothing))
6553 		{
6554 		  /* Try to avoid emitting an extract of vector elements
6555 		     by performing the extracts using an integer type of the
6556 		     same size, extracting from a vector of those and then
6557 		     re-interpreting it as the original vector type if
6558 		     supported.  */
6559 		  unsigned lsize
6560 		    = group_size * GET_MODE_BITSIZE (elmode);
6561 		  elmode = int_mode_for_size (lsize, 0).require ();
6562 		  unsigned int lnunits = const_nunits / group_size;
6563 		  /* If we can't construct such a vector fall back to
6564 		     element extracts from the original vector type and
6565 		     element size stores.  */
6566 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
6567 		      && VECTOR_MODE_P (vmode)
6568 		      && targetm.vector_mode_supported_p (vmode)
6569 		      && (convert_optab_handler (vec_extract_optab,
6570 						 vmode, elmode)
6571 			  != CODE_FOR_nothing))
6572 		    {
6573 		      nstores = lnunits;
6574 		      lnel = group_size;
6575 		      ltype = build_nonstandard_integer_type (lsize, 1);
6576 		      lvectype = build_vector_type (ltype, nstores);
6577 		    }
6578 		  /* Else fall back to vector extraction anyway.
6579 		     Fewer stores are more important than avoiding spilling
6580 		     of the vector we extract from.  Compared to the
6581 		     construction case in vectorizable_load no store-forwarding
6582 		     issue exists here for reasonable archs.  */
6583 		}
6584 	    }
6585 	  else if (group_size >= const_nunits
6586 		   && group_size % const_nunits == 0)
6587 	    {
6588 	      nstores = 1;
6589 	      lnel = const_nunits;
6590 	      ltype = vectype;
6591 	      lvectype = vectype;
6592 	    }
6593 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6594 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6595 	}
6596 
6597       ivstep = stride_step;
6598       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6599 			    build_int_cst (TREE_TYPE (ivstep), vf));
6600 
6601       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6602 
6603       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6604       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6605       create_iv (stride_base, ivstep, NULL,
6606 		 loop, &incr_gsi, insert_after,
6607 		 &offvar, NULL);
6608       incr = gsi_stmt (incr_gsi);
6609       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6610 
6611       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6612 
6613       prev_stmt_info = NULL;
6614       alias_off = build_int_cst (ref_type, 0);
6615       next_stmt = first_stmt;
6616       for (g = 0; g < group_size; g++)
6617 	{
6618 	  running_off = offvar;
6619 	  if (g)
6620 	    {
6621 	      tree size = TYPE_SIZE_UNIT (ltype);
6622 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6623 				      size);
6624 	      tree newoff = copy_ssa_name (running_off, NULL);
6625 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6626 					  running_off, pos);
6627 	      vect_finish_stmt_generation (stmt, incr, gsi);
6628 	      running_off = newoff;
6629 	    }
6630 	  unsigned int group_el = 0;
6631 	  unsigned HOST_WIDE_INT
6632 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6633 	  for (j = 0; j < ncopies; j++)
6634 	    {
6635 	      /* We've set op and dt above, from vect_get_store_rhs,
6636 		 and first_stmt == stmt.  */
6637 	      if (j == 0)
6638 		{
6639 		  if (slp)
6640 		    {
6641 		      vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6642 					 slp_node);
6643 		      vec_oprnd = vec_oprnds[0];
6644 		    }
6645 		  else
6646 		    {
6647 		      op = vect_get_store_rhs (next_stmt);
6648 		      vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6649 		    }
6650 		}
6651 	      else
6652 		{
6653 		  if (slp)
6654 		    vec_oprnd = vec_oprnds[j];
6655 		  else
6656 		    {
6657 		      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6658 		      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6659 								  vec_oprnd);
6660 		    }
6661 		}
6662 	      /* Pun the vector to extract from if necessary.  */
6663 	      if (lvectype != vectype)
6664 		{
6665 		  tree tem = make_ssa_name (lvectype);
6666 		  gimple *pun
6667 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6668 							lvectype, vec_oprnd));
6669 		  vect_finish_stmt_generation (stmt, pun, gsi);
6670 		  vec_oprnd = tem;
6671 		}
6672 	      for (i = 0; i < nstores; i++)
6673 		{
6674 		  tree newref, newoff;
6675 		  gimple *incr, *assign;
6676 		  tree size = TYPE_SIZE (ltype);
6677 		  /* Extract the i'th component.  */
6678 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6679 					  bitsize_int (i), size);
6680 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6681 					   size, pos);
6682 
6683 		  elem = force_gimple_operand_gsi (gsi, elem, true,
6684 						   NULL_TREE, true,
6685 						   GSI_SAME_STMT);
6686 
6687 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
6688 						 group_el * elsz);
6689 		  newref = build2 (MEM_REF, ltype,
6690 				   running_off, this_off);
6691 		  vect_copy_ref_info (newref, DR_REF (first_dr));
6692 
6693 		  /* And store it to *running_off.  */
6694 		  assign = gimple_build_assign (newref, elem);
6695 		  vect_finish_stmt_generation (stmt, assign, gsi);
6696 
6697 		  group_el += lnel;
6698 		  if (! slp
6699 		      || group_el == group_size)
6700 		    {
6701 		      newoff = copy_ssa_name (running_off, NULL);
6702 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6703 						  running_off, stride_step);
6704 		      vect_finish_stmt_generation (stmt, incr, gsi);
6705 
6706 		      running_off = newoff;
6707 		      group_el = 0;
6708 		    }
6709 		  if (g == group_size - 1
6710 		      && !slp)
6711 		    {
6712 		      if (j == 0 && i == 0)
6713 			STMT_VINFO_VEC_STMT (stmt_info)
6714 			    = *vec_stmt = assign;
6715 		      else
6716 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6717 		      prev_stmt_info = vinfo_for_stmt (assign);
6718 		    }
6719 		}
6720 	    }
6721 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6722 	  if (slp)
6723 	    break;
6724 	}
6725 
6726       vec_oprnds.release ();
6727       return true;
6728     }
6729 
6730   auto_vec<tree> dr_chain (group_size);
6731   oprnds.create (group_size);
6732 
6733   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6734   gcc_assert (alignment_support_scheme);
6735   vec_loop_masks *loop_masks
6736     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6737        ? &LOOP_VINFO_MASKS (loop_vinfo)
6738        : NULL);
6739   /* Targets with store-lane instructions must not require explicit
6740      realignment.  vect_supportable_dr_alignment always returns either
6741      dr_aligned or dr_unaligned_supported for masked operations.  */
6742   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6743 	       && !mask
6744 	       && !loop_masks)
6745 	      || alignment_support_scheme == dr_aligned
6746 	      || alignment_support_scheme == dr_unaligned_supported);
6747 
6748   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6749       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6750     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6751 
6752   tree bump;
6753   tree vec_offset = NULL_TREE;
6754   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6755     {
6756       aggr_type = NULL_TREE;
6757       bump = NULL_TREE;
6758     }
6759   else if (memory_access_type == VMAT_GATHER_SCATTER)
6760     {
6761       aggr_type = elem_type;
6762       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6763 				       &bump, &vec_offset);
6764     }
6765   else
6766     {
6767       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6768 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6769       else
6770 	aggr_type = vectype;
6771       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6772     }
6773 
6774   if (mask)
6775     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6776 
6777   /* In case the vectorization factor (VF) is bigger than the number
6778      of elements that we can fit in a vectype (nunits), we have to generate
6779      more than one vector stmt - i.e - we need to "unroll" the
6780      vector stmt by a factor VF/nunits.  For more details see documentation in
6781      vect_get_vec_def_for_copy_stmt.  */
6782 
6783   /* In case of interleaving (non-unit grouped access):
6784 
6785         S1:  &base + 2 = x2
6786         S2:  &base = x0
6787         S3:  &base + 1 = x1
6788         S4:  &base + 3 = x3
6789 
6790      We create vectorized stores starting from base address (the access of the
6791      first stmt in the chain (S2 in the above example), when the last store stmt
6792      of the chain (S4) is reached:
6793 
6794         VS1: &base = vx2
6795 	VS2: &base + vec_size*1 = vx0
6796 	VS3: &base + vec_size*2 = vx1
6797 	VS4: &base + vec_size*3 = vx3
6798 
6799      Then permutation statements are generated:
6800 
6801 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6802 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6803 	...
6804 
6805      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6806      (the order of the data-refs in the output of vect_permute_store_chain
6807      corresponds to the order of scalar stmts in the interleaving chain - see
6808      the documentation of vect_permute_store_chain()).
6809 
6810      In case of both multiple types and interleaving, above vector stores and
6811      permutation stmts are created for every copy.  The result vector stmts are
6812      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6813      STMT_VINFO_RELATED_STMT for the next copies.
6814   */
6815 
6816   prev_stmt_info = NULL;
6817   tree vec_mask = NULL_TREE;
6818   for (j = 0; j < ncopies; j++)
6819     {
6820 
6821       if (j == 0)
6822 	{
6823           if (slp)
6824             {
6825 	      /* Get vectorized arguments for SLP_NODE.  */
6826               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6827                                  NULL, slp_node);
6828 
6829               vec_oprnd = vec_oprnds[0];
6830             }
6831           else
6832             {
6833 	      /* For interleaved stores we collect vectorized defs for all the
6834 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6835 		 used as an input to vect_permute_store_chain(), and OPRNDS as
6836 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6837 
6838 		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6839 		 OPRNDS are of size 1.  */
6840 	      next_stmt = first_stmt;
6841 	      for (i = 0; i < group_size; i++)
6842 		{
6843 		  /* Since gaps are not supported for interleaved stores,
6844 		     GROUP_SIZE is the exact number of stmts in the chain.
6845 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
6846 		     there is no interleaving, GROUP_SIZE is 1, and only one
6847 		     iteration of the loop will be executed.  */
6848 		  op = vect_get_store_rhs (next_stmt);
6849 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6850 		  dr_chain.quick_push (vec_oprnd);
6851 		  oprnds.quick_push (vec_oprnd);
6852 		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6853 		}
6854 	      if (mask)
6855 		vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6856 							 mask_vectype);
6857 	    }
6858 
6859 	  /* We should have catched mismatched types earlier.  */
6860 	  gcc_assert (useless_type_conversion_p (vectype,
6861 						 TREE_TYPE (vec_oprnd)));
6862 	  bool simd_lane_access_p
6863 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6864 	  if (simd_lane_access_p
6865 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6866 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6867 	      && integer_zerop (DR_OFFSET (first_dr))
6868 	      && integer_zerop (DR_INIT (first_dr))
6869 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
6870 					get_alias_set (TREE_TYPE (ref_type))))
6871 	    {
6872 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6873 	      dataref_offset = build_int_cst (ref_type, 0);
6874 	      inv_p = false;
6875 	    }
6876 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6877 	    {
6878 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6879 					   &dataref_ptr, &vec_offset);
6880 	      inv_p = false;
6881 	    }
6882 	  else
6883 	    dataref_ptr
6884 	      = vect_create_data_ref_ptr (first_stmt, aggr_type,
6885 					  simd_lane_access_p ? loop : NULL,
6886 					  offset, &dummy, gsi, &ptr_incr,
6887 					  simd_lane_access_p, &inv_p,
6888 					  NULL_TREE, bump);
6889 	  gcc_assert (bb_vinfo || !inv_p);
6890 	}
6891       else
6892 	{
6893 	  /* For interleaved stores we created vectorized defs for all the
6894 	     defs stored in OPRNDS in the previous iteration (previous copy).
6895 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
6896 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6897 	     next copy.
6898 	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6899 	     OPRNDS are of size 1.  */
6900 	  for (i = 0; i < group_size; i++)
6901 	    {
6902 	      op = oprnds[i];
6903 	      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6904 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
6905 	      dr_chain[i] = vec_oprnd;
6906 	      oprnds[i] = vec_oprnd;
6907 	    }
6908 	  if (mask)
6909 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
6910 	  if (dataref_offset)
6911 	    dataref_offset
6912 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6913 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6914 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6915 							 vec_offset);
6916 	  else
6917 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6918 					   bump);
6919 	}
6920 
6921       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6922 	{
6923 	  tree vec_array;
6924 
6925 	  /* Combine all the vectors into an array.  */
6926 	  vec_array = create_vector_array (vectype, vec_num);
6927 	  for (i = 0; i < vec_num; i++)
6928 	    {
6929 	      vec_oprnd = dr_chain[i];
6930 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6931 	    }
6932 
6933 	  tree final_mask = NULL;
6934 	  if (loop_masks)
6935 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6936 					     vectype, j);
6937 	  if (vec_mask)
6938 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6939 						  vec_mask, gsi);
6940 
6941 	  gcall *call;
6942 	  if (final_mask)
6943 	    {
6944 	      /* Emit:
6945 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6946 				     VEC_ARRAY).  */
6947 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6948 	      tree alias_ptr = build_int_cst (ref_type, align);
6949 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6950 						 dataref_ptr, alias_ptr,
6951 						 final_mask, vec_array);
6952 	    }
6953 	  else
6954 	    {
6955 	      /* Emit:
6956 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
6957 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6958 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6959 						 vec_array);
6960 	      gimple_call_set_lhs (call, data_ref);
6961 	    }
6962 	  gimple_call_set_nothrow (call, true);
6963 	  new_stmt = call;
6964 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6965 	}
6966       else
6967 	{
6968 	  new_stmt = NULL;
6969 	  if (grouped_store)
6970 	    {
6971 	      if (j == 0)
6972 		result_chain.create (group_size);
6973 	      /* Permute.  */
6974 	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6975 					&result_chain);
6976 	    }
6977 
6978 	  next_stmt = first_stmt;
6979 	  for (i = 0; i < vec_num; i++)
6980 	    {
6981 	      unsigned align, misalign;
6982 
6983 	      tree final_mask = NULL_TREE;
6984 	      if (loop_masks)
6985 		final_mask = vect_get_loop_mask (gsi, loop_masks,
6986 						 vec_num * ncopies,
6987 						 vectype, vec_num * j + i);
6988 	      if (vec_mask)
6989 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6990 						      vec_mask, gsi);
6991 
6992 	      if (memory_access_type == VMAT_GATHER_SCATTER)
6993 		{
6994 		  tree scale = size_int (gs_info.scale);
6995 		  gcall *call;
6996 		  if (loop_masks)
6997 		    call = gimple_build_call_internal
6998 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
6999 		       scale, vec_oprnd, final_mask);
7000 		  else
7001 		    call = gimple_build_call_internal
7002 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7003 		       scale, vec_oprnd);
7004 		  gimple_call_set_nothrow (call, true);
7005 		  new_stmt = call;
7006 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
7007 		  break;
7008 		}
7009 
7010 	      if (i > 0)
7011 		/* Bump the vector pointer.  */
7012 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7013 					       stmt, bump);
7014 
7015 	      if (slp)
7016 		vec_oprnd = vec_oprnds[i];
7017 	      else if (grouped_store)
7018 		/* For grouped stores vectorized defs are interleaved in
7019 		   vect_permute_store_chain().  */
7020 		vec_oprnd = result_chain[i];
7021 
7022 	      align = DR_TARGET_ALIGNMENT (first_dr);
7023 	      if (aligned_access_p (first_dr))
7024 		misalign = 0;
7025 	      else if (DR_MISALIGNMENT (first_dr) == -1)
7026 		{
7027 		  align = dr_alignment (vect_dr_behavior (first_dr));
7028 		  misalign = 0;
7029 		}
7030 	      else
7031 		misalign = DR_MISALIGNMENT (first_dr);
7032 	      if (dataref_offset == NULL_TREE
7033 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
7034 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7035 					misalign);
7036 
7037 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7038 		{
7039 		  tree perm_mask = perm_mask_for_reverse (vectype);
7040 		  tree perm_dest
7041 		    = vect_create_destination_var (vect_get_store_rhs (stmt),
7042 						   vectype);
7043 		  tree new_temp = make_ssa_name (perm_dest);
7044 
7045 		  /* Generate the permute statement.  */
7046 		  gimple *perm_stmt
7047 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7048 					   vec_oprnd, perm_mask);
7049 		  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7050 
7051 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7052 		  vec_oprnd = new_temp;
7053 		}
7054 
7055 	      /* Arguments are ready.  Create the new vector stmt.  */
7056 	      if (final_mask)
7057 		{
7058 		  align = least_bit_hwi (misalign | align);
7059 		  tree ptr = build_int_cst (ref_type, align);
7060 		  gcall *call
7061 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
7062 						  dataref_ptr, ptr,
7063 						  final_mask, vec_oprnd);
7064 		  gimple_call_set_nothrow (call, true);
7065 		  new_stmt = call;
7066 		}
7067 	      else
7068 		{
7069 		  data_ref = fold_build2 (MEM_REF, vectype,
7070 					  dataref_ptr,
7071 					  dataref_offset
7072 					  ? dataref_offset
7073 					  : build_int_cst (ref_type, 0));
7074 		  if (aligned_access_p (first_dr))
7075 		    ;
7076 		  else if (DR_MISALIGNMENT (first_dr) == -1)
7077 		    TREE_TYPE (data_ref)
7078 		      = build_aligned_type (TREE_TYPE (data_ref),
7079 					    align * BITS_PER_UNIT);
7080 		  else
7081 		    TREE_TYPE (data_ref)
7082 		      = build_aligned_type (TREE_TYPE (data_ref),
7083 					    TYPE_ALIGN (elem_type));
7084 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
7085 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7086 		}
7087 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
7088 
7089 	      if (slp)
7090 		continue;
7091 
7092 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7093 	      if (!next_stmt)
7094 		break;
7095 	    }
7096 	}
7097       if (!slp)
7098 	{
7099 	  if (j == 0)
7100 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7101 	  else
7102 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7103 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
7104 	}
7105     }
7106 
7107   oprnds.release ();
7108   result_chain.release ();
7109   vec_oprnds.release ();
7110 
7111   return true;
7112 }
7113 
7114 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7115    VECTOR_CST mask.  No checks are made that the target platform supports the
7116    mask, so callers may wish to test can_vec_perm_const_p separately, or use
7117    vect_gen_perm_mask_checked.  */
7118 
7119 tree
7120 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7121 {
7122   tree mask_type;
7123 
7124   poly_uint64 nunits = sel.length ();
7125   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7126 
7127   mask_type = build_vector_type (ssizetype, nunits);
7128   return vec_perm_indices_to_tree (mask_type, sel);
7129 }
7130 
7131 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
7132    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
7133 
7134 tree
7135 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7136 {
7137   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7138   return vect_gen_perm_mask_any (vectype, sel);
7139 }
7140 
7141 /* Given a vector variable X and Y, that was generated for the scalar
7142    STMT, generate instructions to permute the vector elements of X and Y
7143    using permutation mask MASK_VEC, insert them at *GSI and return the
7144    permuted vector variable.  */
7145 
7146 static tree
7147 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7148 		      gimple_stmt_iterator *gsi)
7149 {
7150   tree vectype = TREE_TYPE (x);
7151   tree perm_dest, data_ref;
7152   gimple *perm_stmt;
7153 
7154   tree scalar_dest = gimple_get_lhs (stmt);
7155   if (TREE_CODE (scalar_dest) == SSA_NAME)
7156     perm_dest = vect_create_destination_var (scalar_dest, vectype);
7157   else
7158     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7159   data_ref = make_ssa_name (perm_dest);
7160 
7161   /* Generate the permute statement.  */
7162   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7163   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7164 
7165   return data_ref;
7166 }
7167 
7168 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7169    inserting them on the loops preheader edge.  Returns true if we
7170    were successful in doing so (and thus STMT can be moved then),
7171    otherwise returns false.  */
7172 
7173 static bool
7174 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7175 {
7176   ssa_op_iter i;
7177   tree op;
7178   bool any = false;
7179 
7180   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7181     {
7182       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7183       if (!gimple_nop_p (def_stmt)
7184 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7185 	{
7186 	  /* Make sure we don't need to recurse.  While we could do
7187 	     so in simple cases when there are more complex use webs
7188 	     we don't have an easy way to preserve stmt order to fulfil
7189 	     dependencies within them.  */
7190 	  tree op2;
7191 	  ssa_op_iter i2;
7192 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
7193 	    return false;
7194 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7195 	    {
7196 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7197 	      if (!gimple_nop_p (def_stmt2)
7198 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7199 		return false;
7200 	    }
7201 	  any = true;
7202 	}
7203     }
7204 
7205   if (!any)
7206     return true;
7207 
7208   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7209     {
7210       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7211       if (!gimple_nop_p (def_stmt)
7212 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7213 	{
7214 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7215 	  gsi_remove (&gsi, false);
7216 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7217 	}
7218     }
7219 
7220   return true;
7221 }
7222 
7223 /* vectorizable_load.
7224 
7225    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7226    can be vectorized.
7227    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7228    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7229    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
7230 
7231 static bool
7232 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7233                    slp_tree slp_node, slp_instance slp_node_instance)
7234 {
7235   tree scalar_dest;
7236   tree vec_dest = NULL;
7237   tree data_ref = NULL;
7238   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7239   stmt_vec_info prev_stmt_info;
7240   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7241   struct loop *loop = NULL;
7242   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7243   bool nested_in_vect_loop = false;
7244   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7245   tree elem_type;
7246   tree new_temp;
7247   machine_mode mode;
7248   gimple *new_stmt = NULL;
7249   tree dummy;
7250   enum dr_alignment_support alignment_support_scheme;
7251   tree dataref_ptr = NULL_TREE;
7252   tree dataref_offset = NULL_TREE;
7253   gimple *ptr_incr = NULL;
7254   int ncopies;
7255   int i, j;
7256   unsigned int group_size;
7257   poly_uint64 group_gap_adj;
7258   tree msq = NULL_TREE, lsq;
7259   tree offset = NULL_TREE;
7260   tree byte_offset = NULL_TREE;
7261   tree realignment_token = NULL_TREE;
7262   gphi *phi = NULL;
7263   vec<tree> dr_chain = vNULL;
7264   bool grouped_load = false;
7265   gimple *first_stmt;
7266   gimple *first_stmt_for_drptr = NULL;
7267   bool inv_p;
7268   bool compute_in_loop = false;
7269   struct loop *at_loop;
7270   int vec_num;
7271   bool slp = (slp_node != NULL);
7272   bool slp_perm = false;
7273   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7274   poly_uint64 vf;
7275   tree aggr_type;
7276   gather_scatter_info gs_info;
7277   vec_info *vinfo = stmt_info->vinfo;
7278   tree ref_type;
7279   enum vect_def_type mask_dt = vect_unknown_def_type;
7280 
7281   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7282     return false;
7283 
7284   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7285       && ! vec_stmt)
7286     return false;
7287 
7288   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7289   if (is_gimple_assign (stmt))
7290     {
7291       scalar_dest = gimple_assign_lhs (stmt);
7292       if (TREE_CODE (scalar_dest) != SSA_NAME)
7293 	return false;
7294 
7295       tree_code code = gimple_assign_rhs_code (stmt);
7296       if (code != ARRAY_REF
7297 	  && code != BIT_FIELD_REF
7298 	  && code != INDIRECT_REF
7299 	  && code != COMPONENT_REF
7300 	  && code != IMAGPART_EXPR
7301 	  && code != REALPART_EXPR
7302 	  && code != MEM_REF
7303 	  && TREE_CODE_CLASS (code) != tcc_declaration)
7304 	return false;
7305     }
7306   else
7307     {
7308       gcall *call = dyn_cast <gcall *> (stmt);
7309       if (!call || !gimple_call_internal_p (call))
7310 	return false;
7311 
7312       internal_fn ifn = gimple_call_internal_fn (call);
7313       if (!internal_load_fn_p (ifn))
7314 	return false;
7315 
7316       scalar_dest = gimple_call_lhs (call);
7317       if (!scalar_dest)
7318 	return false;
7319 
7320       if (slp_node != NULL)
7321 	{
7322 	  if (dump_enabled_p ())
7323 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7324 			     "SLP of masked loads not supported.\n");
7325 	  return false;
7326 	}
7327 
7328       int mask_index = internal_fn_mask_index (ifn);
7329       if (mask_index >= 0)
7330 	{
7331 	  mask = gimple_call_arg (call, mask_index);
7332 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7333 					   &mask_vectype))
7334 	    return false;
7335 	}
7336     }
7337 
7338   if (!STMT_VINFO_DATA_REF (stmt_info))
7339     return false;
7340 
7341   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7342   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7343 
7344   if (loop_vinfo)
7345     {
7346       loop = LOOP_VINFO_LOOP (loop_vinfo);
7347       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7348       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7349     }
7350   else
7351     vf = 1;
7352 
7353   /* Multiple types in SLP are handled by creating the appropriate number of
7354      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7355      case of SLP.  */
7356   if (slp)
7357     ncopies = 1;
7358   else
7359     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7360 
7361   gcc_assert (ncopies >= 1);
7362 
7363   /* FORNOW. This restriction should be relaxed.  */
7364   if (nested_in_vect_loop && ncopies > 1)
7365     {
7366       if (dump_enabled_p ())
7367         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7368                          "multiple types in nested loop.\n");
7369       return false;
7370     }
7371 
7372   /* Invalidate assumptions made by dependence analysis when vectorization
7373      on the unrolled body effectively re-orders stmts.  */
7374   if (ncopies > 1
7375       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7376       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7377 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7378     {
7379       if (dump_enabled_p ())
7380 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7381 			 "cannot perform implicit CSE when unrolling "
7382 			 "with negative dependence distance\n");
7383       return false;
7384     }
7385 
7386   elem_type = TREE_TYPE (vectype);
7387   mode = TYPE_MODE (vectype);
7388 
7389   /* FORNOW. In some cases can vectorize even if data-type not supported
7390     (e.g. - data copies).  */
7391   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7392     {
7393       if (dump_enabled_p ())
7394         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7395                          "Aligned load, but unsupported type.\n");
7396       return false;
7397     }
7398 
7399   /* Check if the load is a part of an interleaving chain.  */
7400   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7401     {
7402       grouped_load = true;
7403       /* FORNOW */
7404       gcc_assert (!nested_in_vect_loop);
7405       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7406 
7407       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7408       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7409 
7410       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7411 	slp_perm = true;
7412 
7413       /* Invalidate assumptions made by dependence analysis when vectorization
7414 	 on the unrolled body effectively re-orders stmts.  */
7415       if (!PURE_SLP_STMT (stmt_info)
7416 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7417 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7418 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7419 	{
7420 	  if (dump_enabled_p ())
7421 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7422 			     "cannot perform implicit CSE when performing "
7423 			     "group loads with negative dependence distance\n");
7424 	  return false;
7425 	}
7426 
7427       /* Similarly when the stmt is a load that is both part of a SLP
7428          instance and a loop vectorized stmt via the same-dr mechanism
7429 	 we have to give up.  */
7430       if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7431 	  && (STMT_SLP_TYPE (stmt_info)
7432 	      != STMT_SLP_TYPE (vinfo_for_stmt
7433 				 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7434 	{
7435 	  if (dump_enabled_p ())
7436 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7437 			     "conflicting SLP types for CSEd load\n");
7438 	  return false;
7439 	}
7440     }
7441   else
7442     group_size = 1;
7443 
7444   vect_memory_access_type memory_access_type;
7445   if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7446 			    &memory_access_type, &gs_info))
7447     return false;
7448 
7449   if (mask)
7450     {
7451       if (memory_access_type == VMAT_CONTIGUOUS)
7452 	{
7453 	  machine_mode vec_mode = TYPE_MODE (vectype);
7454 	  if (!VECTOR_MODE_P (vec_mode)
7455 	      || !can_vec_mask_load_store_p (vec_mode,
7456 					     TYPE_MODE (mask_vectype), true))
7457 	    return false;
7458 	}
7459       else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7460 	{
7461 	  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7462 	  tree masktype
7463 	    = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7464 	  if (TREE_CODE (masktype) == INTEGER_TYPE)
7465 	    {
7466 	      if (dump_enabled_p ())
7467 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7468 				 "masked gather with integer mask not"
7469 				 " supported.");
7470 	      return false;
7471 	    }
7472 	}
7473       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7474 	       && memory_access_type != VMAT_GATHER_SCATTER)
7475 	{
7476 	  if (dump_enabled_p ())
7477 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7478 			     "unsupported access type for masked load.\n");
7479 	  return false;
7480 	}
7481     }
7482 
7483   if (!vec_stmt) /* transformation not required.  */
7484     {
7485       if (!slp)
7486 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7487 
7488       if (loop_vinfo
7489 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7490 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7491 				  memory_access_type, &gs_info);
7492 
7493       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7494       /* The SLP costs are calculated during SLP analysis.  */
7495       if (! slp_node)
7496 	vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7497 			      NULL, NULL, NULL);
7498       return true;
7499     }
7500 
7501   if (!slp)
7502     gcc_assert (memory_access_type
7503 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7504 
7505   if (dump_enabled_p ())
7506     dump_printf_loc (MSG_NOTE, vect_location,
7507                      "transform load. ncopies = %d\n", ncopies);
7508 
7509   /* Transform.  */
7510 
7511   ensure_base_align (dr);
7512 
7513   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7514     {
7515       vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7516 				    mask_dt);
7517       return true;
7518     }
7519 
7520   if (memory_access_type == VMAT_ELEMENTWISE
7521       || memory_access_type == VMAT_STRIDED_SLP)
7522     {
7523       gimple_stmt_iterator incr_gsi;
7524       bool insert_after;
7525       gimple *incr;
7526       tree offvar;
7527       tree ivstep;
7528       tree running_off;
7529       vec<constructor_elt, va_gc> *v = NULL;
7530       tree stride_base, stride_step, alias_off;
7531       /* Checked by get_load_store_type.  */
7532       unsigned int const_nunits = nunits.to_constant ();
7533       unsigned HOST_WIDE_INT cst_offset = 0;
7534 
7535       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7536       gcc_assert (!nested_in_vect_loop);
7537 
7538       if (grouped_load)
7539 	{
7540 	  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7541 	  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7542 	}
7543       else
7544 	{
7545 	  first_stmt = stmt;
7546 	  first_dr = dr;
7547 	}
7548       if (slp && grouped_load)
7549 	{
7550 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7551 	  ref_type = get_group_alias_ptr_type (first_stmt);
7552 	}
7553       else
7554 	{
7555 	  if (grouped_load)
7556 	    cst_offset
7557 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7558 		 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7559 	  group_size = 1;
7560 	  ref_type = reference_alias_ptr_type (DR_REF (dr));
7561 	}
7562 
7563       stride_base
7564 	= fold_build_pointer_plus
7565 	    (DR_BASE_ADDRESS (first_dr),
7566 	     size_binop (PLUS_EXPR,
7567 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7568 			 convert_to_ptrofftype (DR_INIT (first_dr))));
7569       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7570 
7571       /* For a load with loop-invariant (but other than power-of-2)
7572          stride (i.e. not a grouped access) like so:
7573 
7574 	   for (i = 0; i < n; i += stride)
7575 	     ... = array[i];
7576 
7577 	 we generate a new induction variable and new accesses to
7578 	 form a new vector (or vectors, depending on ncopies):
7579 
7580 	   for (j = 0; ; j += VF*stride)
7581 	     tmp1 = array[j];
7582 	     tmp2 = array[j + stride];
7583 	     ...
7584 	     vectemp = {tmp1, tmp2, ...}
7585          */
7586 
7587       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7588 			    build_int_cst (TREE_TYPE (stride_step), vf));
7589 
7590       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7591 
7592       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7593       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7594       create_iv (stride_base, ivstep, NULL,
7595 		 loop, &incr_gsi, insert_after,
7596 		 &offvar, NULL);
7597       incr = gsi_stmt (incr_gsi);
7598       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7599 
7600       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7601 
7602       prev_stmt_info = NULL;
7603       running_off = offvar;
7604       alias_off = build_int_cst (ref_type, 0);
7605       int nloads = const_nunits;
7606       int lnel = 1;
7607       tree ltype = TREE_TYPE (vectype);
7608       tree lvectype = vectype;
7609       auto_vec<tree> dr_chain;
7610       if (memory_access_type == VMAT_STRIDED_SLP)
7611 	{
7612 	  if (group_size < const_nunits)
7613 	    {
7614 	      /* First check if vec_init optab supports construction from
7615 		 vector elts directly.  */
7616 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7617 	      machine_mode vmode;
7618 	      if (mode_for_vector (elmode, group_size).exists (&vmode)
7619 		  && VECTOR_MODE_P (vmode)
7620 		  && targetm.vector_mode_supported_p (vmode)
7621 		  && (convert_optab_handler (vec_init_optab,
7622 					     TYPE_MODE (vectype), vmode)
7623 		      != CODE_FOR_nothing))
7624 		{
7625 		  nloads = const_nunits / group_size;
7626 		  lnel = group_size;
7627 		  ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7628 		}
7629 	      else
7630 		{
7631 		  /* Otherwise avoid emitting a constructor of vector elements
7632 		     by performing the loads using an integer type of the same
7633 		     size, constructing a vector of those and then
7634 		     re-interpreting it as the original vector type.
7635 		     This avoids a huge runtime penalty due to the general
7636 		     inability to perform store forwarding from smaller stores
7637 		     to a larger load.  */
7638 		  unsigned lsize
7639 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7640 		  elmode = int_mode_for_size (lsize, 0).require ();
7641 		  unsigned int lnunits = const_nunits / group_size;
7642 		  /* If we can't construct such a vector fall back to
7643 		     element loads of the original vector type.  */
7644 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
7645 		      && VECTOR_MODE_P (vmode)
7646 		      && targetm.vector_mode_supported_p (vmode)
7647 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
7648 			  != CODE_FOR_nothing))
7649 		    {
7650 		      nloads = lnunits;
7651 		      lnel = group_size;
7652 		      ltype = build_nonstandard_integer_type (lsize, 1);
7653 		      lvectype = build_vector_type (ltype, nloads);
7654 		    }
7655 		}
7656 	    }
7657 	  else
7658 	    {
7659 	      nloads = 1;
7660 	      lnel = const_nunits;
7661 	      ltype = vectype;
7662 	    }
7663 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7664 	}
7665       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
7666       else if (nloads == 1)
7667 	ltype = vectype;
7668 
7669       if (slp)
7670 	{
7671 	  /* For SLP permutation support we need to load the whole group,
7672 	     not only the number of vector stmts the permutation result
7673 	     fits in.  */
7674 	  if (slp_perm)
7675 	    {
7676 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7677 		 variable VF.  */
7678 	      unsigned int const_vf = vf.to_constant ();
7679 	      ncopies = CEIL (group_size * const_vf, const_nunits);
7680 	      dr_chain.create (ncopies);
7681 	    }
7682 	  else
7683 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7684 	}
7685       unsigned int group_el = 0;
7686       unsigned HOST_WIDE_INT
7687 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7688       for (j = 0; j < ncopies; j++)
7689 	{
7690 	  if (nloads > 1)
7691 	    vec_alloc (v, nloads);
7692 	  for (i = 0; i < nloads; i++)
7693 	    {
7694 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
7695 					     group_el * elsz + cst_offset);
7696 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7697 	      vect_copy_ref_info (data_ref, DR_REF (first_dr));
7698 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7699 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
7700 	      if (nloads > 1)
7701 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7702 					gimple_assign_lhs (new_stmt));
7703 
7704 	      group_el += lnel;
7705 	      if (! slp
7706 		  || group_el == group_size)
7707 		{
7708 		  tree newoff = copy_ssa_name (running_off);
7709 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7710 						      running_off, stride_step);
7711 		  vect_finish_stmt_generation (stmt, incr, gsi);
7712 
7713 		  running_off = newoff;
7714 		  group_el = 0;
7715 		}
7716 	    }
7717 	  if (nloads > 1)
7718 	    {
7719 	      tree vec_inv = build_constructor (lvectype, v);
7720 	      new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7721 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
7722 	      if (lvectype != vectype)
7723 		{
7724 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
7725 						  VIEW_CONVERT_EXPR,
7726 						  build1 (VIEW_CONVERT_EXPR,
7727 							  vectype, new_temp));
7728 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
7729 		}
7730 	    }
7731 
7732 	  if (slp)
7733 	    {
7734 	      if (slp_perm)
7735 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7736 	      else
7737 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7738 	    }
7739 	  else
7740 	    {
7741 	      if (j == 0)
7742 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7743 	      else
7744 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7745 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
7746 	    }
7747 	}
7748       if (slp_perm)
7749 	{
7750 	  unsigned n_perms;
7751 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7752 					slp_node_instance, false, &n_perms);
7753 	}
7754       return true;
7755     }
7756 
7757   if (memory_access_type == VMAT_GATHER_SCATTER
7758       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7759     grouped_load = false;
7760 
7761   if (grouped_load)
7762     {
7763       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7764       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7765       /* For SLP vectorization we directly vectorize a subchain
7766          without permutation.  */
7767       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7768 	first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7769       /* For BB vectorization always use the first stmt to base
7770 	 the data ref pointer on.  */
7771       if (bb_vinfo)
7772 	first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7773 
7774       /* Check if the chain of loads is already vectorized.  */
7775       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7776 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7777 	     ???  But we can only do so if there is exactly one
7778 	     as we have no way to get at the rest.  Leave the CSE
7779 	     opportunity alone.
7780 	     ???  With the group load eventually participating
7781 	     in multiple different permutations (having multiple
7782 	     slp nodes which refer to the same group) the CSE
7783 	     is even wrong code.  See PR56270.  */
7784 	  && !slp)
7785 	{
7786 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7787 	  return true;
7788 	}
7789       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7790       group_gap_adj = 0;
7791 
7792       /* VEC_NUM is the number of vect stmts to be created for this group.  */
7793       if (slp)
7794 	{
7795 	  grouped_load = false;
7796 	  /* For SLP permutation support we need to load the whole group,
7797 	     not only the number of vector stmts the permutation result
7798 	     fits in.  */
7799 	  if (slp_perm)
7800 	    {
7801 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7802 		 variable VF.  */
7803 	      unsigned int const_vf = vf.to_constant ();
7804 	      unsigned int const_nunits = nunits.to_constant ();
7805 	      vec_num = CEIL (group_size * const_vf, const_nunits);
7806 	      group_gap_adj = vf * group_size - nunits * vec_num;
7807 	    }
7808 	  else
7809 	    {
7810 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7811 	      group_gap_adj
7812 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7813 	    }
7814     	}
7815       else
7816 	vec_num = group_size;
7817 
7818       ref_type = get_group_alias_ptr_type (first_stmt);
7819     }
7820   else
7821     {
7822       first_stmt = stmt;
7823       first_dr = dr;
7824       group_size = vec_num = 1;
7825       group_gap_adj = 0;
7826       ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7827     }
7828 
7829   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7830   gcc_assert (alignment_support_scheme);
7831   vec_loop_masks *loop_masks
7832     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7833        ? &LOOP_VINFO_MASKS (loop_vinfo)
7834        : NULL);
7835   /* Targets with store-lane instructions must not require explicit
7836      realignment.  vect_supportable_dr_alignment always returns either
7837      dr_aligned or dr_unaligned_supported for masked operations.  */
7838   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7839 	       && !mask
7840 	       && !loop_masks)
7841 	      || alignment_support_scheme == dr_aligned
7842 	      || alignment_support_scheme == dr_unaligned_supported);
7843 
7844   /* In case the vectorization factor (VF) is bigger than the number
7845      of elements that we can fit in a vectype (nunits), we have to generate
7846      more than one vector stmt - i.e - we need to "unroll" the
7847      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
7848      from one copy of the vector stmt to the next, in the field
7849      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
7850      stages to find the correct vector defs to be used when vectorizing
7851      stmts that use the defs of the current stmt.  The example below
7852      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7853      need to create 4 vectorized stmts):
7854 
7855      before vectorization:
7856                                 RELATED_STMT    VEC_STMT
7857         S1:     x = memref      -               -
7858         S2:     z = x + 1       -               -
7859 
7860      step 1: vectorize stmt S1:
7861         We first create the vector stmt VS1_0, and, as usual, record a
7862         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7863         Next, we create the vector stmt VS1_1, and record a pointer to
7864         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7865         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
7866         stmts and pointers:
7867                                 RELATED_STMT    VEC_STMT
7868         VS1_0:  vx0 = memref0   VS1_1           -
7869         VS1_1:  vx1 = memref1   VS1_2           -
7870         VS1_2:  vx2 = memref2   VS1_3           -
7871         VS1_3:  vx3 = memref3   -               -
7872         S1:     x = load        -               VS1_0
7873         S2:     z = x + 1       -               -
7874 
7875      See in documentation in vect_get_vec_def_for_stmt_copy for how the
7876      information we recorded in RELATED_STMT field is used to vectorize
7877      stmt S2.  */
7878 
7879   /* In case of interleaving (non-unit grouped access):
7880 
7881      S1:  x2 = &base + 2
7882      S2:  x0 = &base
7883      S3:  x1 = &base + 1
7884      S4:  x3 = &base + 3
7885 
7886      Vectorized loads are created in the order of memory accesses
7887      starting from the access of the first stmt of the chain:
7888 
7889      VS1: vx0 = &base
7890      VS2: vx1 = &base + vec_size*1
7891      VS3: vx3 = &base + vec_size*2
7892      VS4: vx4 = &base + vec_size*3
7893 
7894      Then permutation statements are generated:
7895 
7896      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7897      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7898        ...
7899 
7900      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7901      (the order of the data-refs in the output of vect_permute_load_chain
7902      corresponds to the order of scalar stmts in the interleaving chain - see
7903      the documentation of vect_permute_load_chain()).
7904      The generation of permutation stmts and recording them in
7905      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7906 
7907      In case of both multiple types and interleaving, the vector loads and
7908      permutation stmts above are created for every copy.  The result vector
7909      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7910      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
7911 
7912   /* If the data reference is aligned (dr_aligned) or potentially unaligned
7913      on a target that supports unaligned accesses (dr_unaligned_supported)
7914      we generate the following code:
7915          p = initial_addr;
7916          indx = 0;
7917          loop {
7918 	   p = p + indx * vectype_size;
7919            vec_dest = *(p);
7920            indx = indx + 1;
7921          }
7922 
7923      Otherwise, the data reference is potentially unaligned on a target that
7924      does not support unaligned accesses (dr_explicit_realign_optimized) -
7925      then generate the following code, in which the data in each iteration is
7926      obtained by two vector loads, one from the previous iteration, and one
7927      from the current iteration:
7928          p1 = initial_addr;
7929          msq_init = *(floor(p1))
7930          p2 = initial_addr + VS - 1;
7931          realignment_token = call target_builtin;
7932          indx = 0;
7933          loop {
7934            p2 = p2 + indx * vectype_size
7935            lsq = *(floor(p2))
7936            vec_dest = realign_load (msq, lsq, realignment_token)
7937            indx = indx + 1;
7938            msq = lsq;
7939          }   */
7940 
7941   /* If the misalignment remains the same throughout the execution of the
7942      loop, we can create the init_addr and permutation mask at the loop
7943      preheader.  Otherwise, it needs to be created inside the loop.
7944      This can only occur when vectorizing memory accesses in the inner-loop
7945      nested within an outer-loop that is being vectorized.  */
7946 
7947   if (nested_in_vect_loop
7948       && !multiple_p (DR_STEP_ALIGNMENT (dr),
7949 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
7950     {
7951       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7952       compute_in_loop = true;
7953     }
7954 
7955   if ((alignment_support_scheme == dr_explicit_realign_optimized
7956        || alignment_support_scheme == dr_explicit_realign)
7957       && !compute_in_loop)
7958     {
7959       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7960 				    alignment_support_scheme, NULL_TREE,
7961 				    &at_loop);
7962       if (alignment_support_scheme == dr_explicit_realign_optimized)
7963 	{
7964 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7965 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7966 				    size_one_node);
7967 	}
7968     }
7969   else
7970     at_loop = loop;
7971 
7972   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7973     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7974 
7975   tree bump;
7976   tree vec_offset = NULL_TREE;
7977   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7978     {
7979       aggr_type = NULL_TREE;
7980       bump = NULL_TREE;
7981     }
7982   else if (memory_access_type == VMAT_GATHER_SCATTER)
7983     {
7984       aggr_type = elem_type;
7985       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7986 				       &bump, &vec_offset);
7987     }
7988   else
7989     {
7990       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7991 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7992       else
7993 	aggr_type = vectype;
7994       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
7995     }
7996 
7997   tree vec_mask = NULL_TREE;
7998   prev_stmt_info = NULL;
7999   poly_uint64 group_elt = 0;
8000   for (j = 0; j < ncopies; j++)
8001     {
8002       /* 1. Create the vector or array pointer update chain.  */
8003       if (j == 0)
8004 	{
8005 	  bool simd_lane_access_p
8006 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8007 	  if (simd_lane_access_p
8008 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8009 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8010 	      && integer_zerop (DR_OFFSET (first_dr))
8011 	      && integer_zerop (DR_INIT (first_dr))
8012 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8013 					get_alias_set (TREE_TYPE (ref_type)))
8014 	      && (alignment_support_scheme == dr_aligned
8015 		  || alignment_support_scheme == dr_unaligned_supported))
8016 	    {
8017 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8018 	      dataref_offset = build_int_cst (ref_type, 0);
8019 	      inv_p = false;
8020 	    }
8021 	  else if (first_stmt_for_drptr
8022 		   && first_stmt != first_stmt_for_drptr)
8023 	    {
8024 	      dataref_ptr
8025 		= vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8026 					    at_loop, offset, &dummy, gsi,
8027 					    &ptr_incr, simd_lane_access_p,
8028 					    &inv_p, byte_offset, bump);
8029 	      /* Adjust the pointer by the difference to first_stmt.  */
8030 	      data_reference_p ptrdr
8031 		= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8032 	      tree diff = fold_convert (sizetype,
8033 					size_binop (MINUS_EXPR,
8034 						    DR_INIT (first_dr),
8035 						    DR_INIT (ptrdr)));
8036 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8037 					     stmt, diff);
8038 	    }
8039 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8040 	    {
8041 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8042 					   &dataref_ptr, &vec_offset);
8043 	      inv_p = false;
8044 	    }
8045 	  else
8046 	    dataref_ptr
8047 	      = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8048 					  offset, &dummy, gsi, &ptr_incr,
8049 					  simd_lane_access_p, &inv_p,
8050 					  byte_offset, bump);
8051 	  if (mask)
8052 	    vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8053 						     mask_vectype);
8054 	}
8055       else
8056 	{
8057 	  if (dataref_offset)
8058 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8059 					      bump);
8060 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8061 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8062 							 vec_offset);
8063 	  else
8064 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8065 					   stmt, bump);
8066 	  if (mask)
8067 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8068 	}
8069 
8070       if (grouped_load || slp_perm)
8071 	dr_chain.create (vec_num);
8072 
8073       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8074 	{
8075 	  tree vec_array;
8076 
8077 	  vec_array = create_vector_array (vectype, vec_num);
8078 
8079 	  tree final_mask = NULL_TREE;
8080 	  if (loop_masks)
8081 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8082 					     vectype, j);
8083 	  if (vec_mask)
8084 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8085 						  vec_mask, gsi);
8086 
8087 	  gcall *call;
8088 	  if (final_mask)
8089 	    {
8090 	      /* Emit:
8091 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8092 		                                VEC_MASK).  */
8093 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8094 	      tree alias_ptr = build_int_cst (ref_type, align);
8095 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8096 						 dataref_ptr, alias_ptr,
8097 						 final_mask);
8098 	    }
8099 	  else
8100 	    {
8101 	      /* Emit:
8102 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
8103 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8104 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8105 	    }
8106 	  gimple_call_set_lhs (call, vec_array);
8107 	  gimple_call_set_nothrow (call, true);
8108 	  new_stmt = call;
8109 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8110 
8111 	  /* Extract each vector into an SSA_NAME.  */
8112 	  for (i = 0; i < vec_num; i++)
8113 	    {
8114 	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
8115 					    vec_array, i);
8116 	      dr_chain.quick_push (new_temp);
8117 	    }
8118 
8119 	  /* Record the mapping between SSA_NAMEs and statements.  */
8120 	  vect_record_grouped_load_vectors (stmt, dr_chain);
8121 	}
8122       else
8123 	{
8124 	  for (i = 0; i < vec_num; i++)
8125 	    {
8126 	      tree final_mask = NULL_TREE;
8127 	      if (loop_masks
8128 		  && memory_access_type != VMAT_INVARIANT)
8129 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8130 						 vec_num * ncopies,
8131 						 vectype, vec_num * j + i);
8132 	      if (vec_mask)
8133 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8134 						      vec_mask, gsi);
8135 
8136 	      if (i > 0)
8137 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8138 					       stmt, bump);
8139 
8140 	      /* 2. Create the vector-load in the loop.  */
8141 	      switch (alignment_support_scheme)
8142 		{
8143 		case dr_aligned:
8144 		case dr_unaligned_supported:
8145 		  {
8146 		    unsigned int align, misalign;
8147 
8148 		    if (memory_access_type == VMAT_GATHER_SCATTER)
8149 		      {
8150 			tree scale = size_int (gs_info.scale);
8151 			gcall *call;
8152 			if (loop_masks)
8153 			  call = gimple_build_call_internal
8154 			    (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8155 			     vec_offset, scale, final_mask);
8156 			else
8157 			  call = gimple_build_call_internal
8158 			    (IFN_GATHER_LOAD, 3, dataref_ptr,
8159 			     vec_offset, scale);
8160 			gimple_call_set_nothrow (call, true);
8161 			new_stmt = call;
8162 			data_ref = NULL_TREE;
8163 			break;
8164 		      }
8165 
8166 		    align = DR_TARGET_ALIGNMENT (dr);
8167 		    if (alignment_support_scheme == dr_aligned)
8168 		      {
8169 			gcc_assert (aligned_access_p (first_dr));
8170 			misalign = 0;
8171 		      }
8172 		    else if (DR_MISALIGNMENT (first_dr) == -1)
8173 		      {
8174 			align = dr_alignment (vect_dr_behavior (first_dr));
8175 			misalign = 0;
8176 		      }
8177 		    else
8178 		      misalign = DR_MISALIGNMENT (first_dr);
8179 		    if (dataref_offset == NULL_TREE
8180 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
8181 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8182 					      align, misalign);
8183 
8184 		    if (final_mask)
8185 		      {
8186 			align = least_bit_hwi (misalign | align);
8187 			tree ptr = build_int_cst (ref_type, align);
8188 			gcall *call
8189 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8190 							dataref_ptr, ptr,
8191 							final_mask);
8192 			gimple_call_set_nothrow (call, true);
8193 			new_stmt = call;
8194 			data_ref = NULL_TREE;
8195 		      }
8196 		    else
8197 		      {
8198 			data_ref
8199 			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
8200 					 dataref_offset
8201 					 ? dataref_offset
8202 					 : build_int_cst (ref_type, 0));
8203 			if (alignment_support_scheme == dr_aligned)
8204 			  ;
8205 			else if (DR_MISALIGNMENT (first_dr) == -1)
8206 			  TREE_TYPE (data_ref)
8207 			    = build_aligned_type (TREE_TYPE (data_ref),
8208 						  align * BITS_PER_UNIT);
8209 			else
8210 			  TREE_TYPE (data_ref)
8211 			    = build_aligned_type (TREE_TYPE (data_ref),
8212 						  TYPE_ALIGN (elem_type));
8213 		      }
8214 		    break;
8215 		  }
8216 		case dr_explicit_realign:
8217 		  {
8218 		    tree ptr, bump;
8219 
8220 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8221 
8222 		    if (compute_in_loop)
8223 		      msq = vect_setup_realignment (first_stmt, gsi,
8224 						    &realignment_token,
8225 						    dr_explicit_realign,
8226 						    dataref_ptr, NULL);
8227 
8228 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8229 		      ptr = copy_ssa_name (dataref_ptr);
8230 		    else
8231 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8232 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8233 		    new_stmt = gimple_build_assign
8234 				 (ptr, BIT_AND_EXPR, dataref_ptr,
8235 				  build_int_cst
8236 				  (TREE_TYPE (dataref_ptr),
8237 				   -(HOST_WIDE_INT) align));
8238 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8239 		    data_ref
8240 		      = build2 (MEM_REF, vectype, ptr,
8241 				build_int_cst (ref_type, 0));
8242 		    vect_copy_ref_info (data_ref, DR_REF (first_dr));
8243 		    vec_dest = vect_create_destination_var (scalar_dest,
8244 							    vectype);
8245 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
8246 		    new_temp = make_ssa_name (vec_dest, new_stmt);
8247 		    gimple_assign_set_lhs (new_stmt, new_temp);
8248 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8249 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8250 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8251 		    msq = new_temp;
8252 
8253 		    bump = size_binop (MULT_EXPR, vs,
8254 				       TYPE_SIZE_UNIT (elem_type));
8255 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
8256 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8257 		    new_stmt = gimple_build_assign
8258 				 (NULL_TREE, BIT_AND_EXPR, ptr,
8259 				  build_int_cst
8260 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8261 		    ptr = copy_ssa_name (ptr, new_stmt);
8262 		    gimple_assign_set_lhs (new_stmt, ptr);
8263 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8264 		    data_ref
8265 		      = build2 (MEM_REF, vectype, ptr,
8266 				build_int_cst (ref_type, 0));
8267 		    break;
8268 		  }
8269 		case dr_explicit_realign_optimized:
8270 		  {
8271 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8272 		      new_temp = copy_ssa_name (dataref_ptr);
8273 		    else
8274 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8275 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8276 		    new_stmt = gimple_build_assign
8277 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
8278 		       build_int_cst (TREE_TYPE (dataref_ptr),
8279 				     -(HOST_WIDE_INT) align));
8280 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8281 		    data_ref
8282 		      = build2 (MEM_REF, vectype, new_temp,
8283 				build_int_cst (ref_type, 0));
8284 		    break;
8285 		  }
8286 		default:
8287 		  gcc_unreachable ();
8288 		}
8289 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
8290 	      /* DATA_REF is null if we've already built the statement.  */
8291 	      if (data_ref)
8292 		{
8293 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
8294 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
8295 		}
8296 	      new_temp = make_ssa_name (vec_dest, new_stmt);
8297 	      gimple_set_lhs (new_stmt, new_temp);
8298 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8299 
8300 	      /* 3. Handle explicit realignment if necessary/supported.
8301 		 Create in loop:
8302 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
8303 	      if (alignment_support_scheme == dr_explicit_realign_optimized
8304 		  || alignment_support_scheme == dr_explicit_realign)
8305 		{
8306 		  lsq = gimple_assign_lhs (new_stmt);
8307 		  if (!realignment_token)
8308 		    realignment_token = dataref_ptr;
8309 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
8310 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8311 						  msq, lsq, realignment_token);
8312 		  new_temp = make_ssa_name (vec_dest, new_stmt);
8313 		  gimple_assign_set_lhs (new_stmt, new_temp);
8314 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8315 
8316 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
8317 		    {
8318 		      gcc_assert (phi);
8319 		      if (i == vec_num - 1 && j == ncopies - 1)
8320 			add_phi_arg (phi, lsq,
8321 				     loop_latch_edge (containing_loop),
8322 				     UNKNOWN_LOCATION);
8323 		      msq = lsq;
8324 		    }
8325 		}
8326 
8327 	      /* 4. Handle invariant-load.  */
8328 	      if (inv_p && !bb_vinfo)
8329 		{
8330 		  gcc_assert (!grouped_load);
8331 		  /* If we have versioned for aliasing or the loop doesn't
8332 		     have any data dependencies that would preclude this,
8333 		     then we are sure this is a loop invariant load and
8334 		     thus we can insert it on the preheader edge.  */
8335 		  if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8336 		      && !nested_in_vect_loop
8337 		      && hoist_defs_of_uses (stmt, loop))
8338 		    {
8339 		      if (dump_enabled_p ())
8340 			{
8341 			  dump_printf_loc (MSG_NOTE, vect_location,
8342 					   "hoisting out of the vectorized "
8343 					   "loop: ");
8344 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8345 			}
8346 		      tree tem = copy_ssa_name (scalar_dest);
8347 		      gsi_insert_on_edge_immediate
8348 			(loop_preheader_edge (loop),
8349 			 gimple_build_assign (tem,
8350 					      unshare_expr
8351 					        (gimple_assign_rhs1 (stmt))));
8352 		      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8353 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
8354 		      set_vinfo_for_stmt (new_stmt,
8355 					  new_stmt_vec_info (new_stmt, vinfo));
8356 		    }
8357 		  else
8358 		    {
8359 		      gimple_stmt_iterator gsi2 = *gsi;
8360 		      gsi_next (&gsi2);
8361 		      new_temp = vect_init_vector (stmt, scalar_dest,
8362 						   vectype, &gsi2);
8363 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
8364 		    }
8365 		}
8366 
8367 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8368 		{
8369 		  tree perm_mask = perm_mask_for_reverse (vectype);
8370 		  new_temp = permute_vec_elements (new_temp, new_temp,
8371 						   perm_mask, stmt, gsi);
8372 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
8373 		}
8374 
8375 	      /* Collect vector loads and later create their permutation in
8376 		 vect_transform_grouped_load ().  */
8377 	      if (grouped_load || slp_perm)
8378 		dr_chain.quick_push (new_temp);
8379 
8380 	      /* Store vector loads in the corresponding SLP_NODE.  */
8381 	      if (slp && !slp_perm)
8382 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8383 
8384 	      /* With SLP permutation we load the gaps as well, without
8385 	         we need to skip the gaps after we manage to fully load
8386 		 all elements.  group_gap_adj is GROUP_SIZE here.  */
8387 	      group_elt += nunits;
8388 	      if (maybe_ne (group_gap_adj, 0U)
8389 		  && !slp_perm
8390 		  && known_eq (group_elt, group_size - group_gap_adj))
8391 		{
8392 		  poly_wide_int bump_val
8393 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8394 		       * group_gap_adj);
8395 		  tree bump = wide_int_to_tree (sizetype, bump_val);
8396 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8397 						 stmt, bump);
8398 		  group_elt = 0;
8399 		}
8400 	    }
8401 	  /* Bump the vector pointer to account for a gap or for excess
8402 	     elements loaded for a permuted SLP load.  */
8403 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8404 	    {
8405 	      poly_wide_int bump_val
8406 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8407 		   * group_gap_adj);
8408 	      tree bump = wide_int_to_tree (sizetype, bump_val);
8409 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8410 					     stmt, bump);
8411 	    }
8412 	}
8413 
8414       if (slp && !slp_perm)
8415 	continue;
8416 
8417       if (slp_perm)
8418         {
8419 	  unsigned n_perms;
8420           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8421                                              slp_node_instance, false,
8422 					     &n_perms))
8423             {
8424               dr_chain.release ();
8425               return false;
8426             }
8427         }
8428       else
8429         {
8430           if (grouped_load)
8431   	    {
8432 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
8433 		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8434 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8435 	    }
8436           else
8437 	    {
8438 	      if (j == 0)
8439 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8440 	      else
8441 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8442 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
8443 	    }
8444         }
8445       dr_chain.release ();
8446     }
8447 
8448   return true;
8449 }
8450 
8451 /* Function vect_is_simple_cond.
8452 
8453    Input:
8454    LOOP - the loop that is being vectorized.
8455    COND - Condition that is checked for simple use.
8456 
8457    Output:
8458    *COMP_VECTYPE - the vector type for the comparison.
8459    *DTS - The def types for the arguments of the comparison
8460 
8461    Returns whether a COND can be vectorized.  Checks whether
8462    condition operands are supportable using vec_is_simple_use.  */
8463 
8464 static bool
8465 vect_is_simple_cond (tree cond, vec_info *vinfo,
8466 		     tree *comp_vectype, enum vect_def_type *dts,
8467 		     tree vectype)
8468 {
8469   tree lhs, rhs;
8470   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8471 
8472   /* Mask case.  */
8473   if (TREE_CODE (cond) == SSA_NAME
8474       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8475     {
8476       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8477       if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8478 			       &dts[0], comp_vectype)
8479 	  || !*comp_vectype
8480 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8481 	return false;
8482       return true;
8483     }
8484 
8485   if (!COMPARISON_CLASS_P (cond))
8486     return false;
8487 
8488   lhs = TREE_OPERAND (cond, 0);
8489   rhs = TREE_OPERAND (cond, 1);
8490 
8491   if (TREE_CODE (lhs) == SSA_NAME)
8492     {
8493       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8494       if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8495 	return false;
8496     }
8497   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8498 	   || TREE_CODE (lhs) == FIXED_CST)
8499     dts[0] = vect_constant_def;
8500   else
8501     return false;
8502 
8503   if (TREE_CODE (rhs) == SSA_NAME)
8504     {
8505       gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8506       if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8507 	return false;
8508     }
8509   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8510 	   || TREE_CODE (rhs) == FIXED_CST)
8511     dts[1] = vect_constant_def;
8512   else
8513     return false;
8514 
8515   if (vectype1 && vectype2
8516       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8517 		   TYPE_VECTOR_SUBPARTS (vectype2)))
8518     return false;
8519 
8520   *comp_vectype = vectype1 ? vectype1 : vectype2;
8521   /* Invariant comparison.  */
8522   if (! *comp_vectype && vectype)
8523     {
8524       tree scalar_type = TREE_TYPE (lhs);
8525       /* If we can widen the comparison to match vectype do so.  */
8526       if (INTEGRAL_TYPE_P (scalar_type)
8527 	  && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8528 			      TYPE_SIZE (TREE_TYPE (vectype))))
8529 	scalar_type = build_nonstandard_integer_type
8530 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8531 	   TYPE_UNSIGNED (scalar_type));
8532       *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8533     }
8534 
8535   return true;
8536 }
8537 
8538 /* vectorizable_condition.
8539 
8540    Check if STMT is conditional modify expression that can be vectorized.
8541    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8542    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
8543    at GSI.
8544 
8545    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8546    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8547    else clause if it is 2).
8548 
8549    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8550 
8551 bool
8552 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8553 			gimple **vec_stmt, tree reduc_def, int reduc_index,
8554 			slp_tree slp_node)
8555 {
8556   tree scalar_dest = NULL_TREE;
8557   tree vec_dest = NULL_TREE;
8558   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8559   tree then_clause, else_clause;
8560   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8561   tree comp_vectype = NULL_TREE;
8562   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8563   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8564   tree vec_compare;
8565   tree new_temp;
8566   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8567   enum vect_def_type dts[4]
8568     = {vect_unknown_def_type, vect_unknown_def_type,
8569        vect_unknown_def_type, vect_unknown_def_type};
8570   int ndts = 4;
8571   int ncopies;
8572   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8573   stmt_vec_info prev_stmt_info = NULL;
8574   int i, j;
8575   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8576   vec<tree> vec_oprnds0 = vNULL;
8577   vec<tree> vec_oprnds1 = vNULL;
8578   vec<tree> vec_oprnds2 = vNULL;
8579   vec<tree> vec_oprnds3 = vNULL;
8580   tree vec_cmp_type;
8581   bool masked = false;
8582 
8583   if (reduc_index && STMT_SLP_TYPE (stmt_info))
8584     return false;
8585 
8586   vect_reduction_type reduction_type
8587     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8588   if (reduction_type == TREE_CODE_REDUCTION)
8589     {
8590       if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8591 	return false;
8592 
8593       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8594 	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8595 	       && reduc_def))
8596 	return false;
8597 
8598       /* FORNOW: not yet supported.  */
8599       if (STMT_VINFO_LIVE_P (stmt_info))
8600 	{
8601 	  if (dump_enabled_p ())
8602 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8603 			     "value used after loop.\n");
8604 	  return false;
8605 	}
8606     }
8607 
8608   /* Is vectorizable conditional operation?  */
8609   if (!is_gimple_assign (stmt))
8610     return false;
8611 
8612   code = gimple_assign_rhs_code (stmt);
8613 
8614   if (code != COND_EXPR)
8615     return false;
8616 
8617   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8618   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8619 
8620   if (slp_node)
8621     ncopies = 1;
8622   else
8623     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8624 
8625   gcc_assert (ncopies >= 1);
8626   if (reduc_index && ncopies > 1)
8627     return false; /* FORNOW */
8628 
8629   cond_expr = gimple_assign_rhs1 (stmt);
8630   then_clause = gimple_assign_rhs2 (stmt);
8631   else_clause = gimple_assign_rhs3 (stmt);
8632 
8633   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8634 			    &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8635       || !comp_vectype)
8636     return false;
8637 
8638   gimple *def_stmt;
8639   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8640 			   &vectype1))
8641     return false;
8642   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8643 			   &vectype2))
8644     return false;
8645 
8646   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8647     return false;
8648 
8649   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8650     return false;
8651 
8652   masked = !COMPARISON_CLASS_P (cond_expr);
8653   vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8654 
8655   if (vec_cmp_type == NULL_TREE)
8656     return false;
8657 
8658   cond_code = TREE_CODE (cond_expr);
8659   if (!masked)
8660     {
8661       cond_expr0 = TREE_OPERAND (cond_expr, 0);
8662       cond_expr1 = TREE_OPERAND (cond_expr, 1);
8663     }
8664 
8665   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8666     {
8667       /* Boolean values may have another representation in vectors
8668 	 and therefore we prefer bit operations over comparison for
8669 	 them (which also works for scalar masks).  We store opcodes
8670 	 to use in bitop1 and bitop2.  Statement is vectorized as
8671 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8672 	 depending on bitop1 and bitop2 arity.  */
8673       switch (cond_code)
8674 	{
8675 	case GT_EXPR:
8676 	  bitop1 = BIT_NOT_EXPR;
8677 	  bitop2 = BIT_AND_EXPR;
8678 	  break;
8679 	case GE_EXPR:
8680 	  bitop1 = BIT_NOT_EXPR;
8681 	  bitop2 = BIT_IOR_EXPR;
8682 	  break;
8683 	case LT_EXPR:
8684 	  bitop1 = BIT_NOT_EXPR;
8685 	  bitop2 = BIT_AND_EXPR;
8686 	  std::swap (cond_expr0, cond_expr1);
8687 	  break;
8688 	case LE_EXPR:
8689 	  bitop1 = BIT_NOT_EXPR;
8690 	  bitop2 = BIT_IOR_EXPR;
8691 	  std::swap (cond_expr0, cond_expr1);
8692 	  break;
8693 	case NE_EXPR:
8694 	  bitop1 = BIT_XOR_EXPR;
8695 	  break;
8696 	case EQ_EXPR:
8697 	  bitop1 = BIT_XOR_EXPR;
8698 	  bitop2 = BIT_NOT_EXPR;
8699 	  break;
8700 	default:
8701 	  return false;
8702 	}
8703       cond_code = SSA_NAME;
8704     }
8705 
8706   if (!vec_stmt)
8707     {
8708       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8709       if (bitop1 != NOP_EXPR)
8710 	{
8711 	  machine_mode mode = TYPE_MODE (comp_vectype);
8712 	  optab optab;
8713 
8714 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8715 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8716 	    return false;
8717 
8718 	  if (bitop2 != NOP_EXPR)
8719 	    {
8720 	      optab = optab_for_tree_code (bitop2, comp_vectype,
8721 					   optab_default);
8722 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8723 		return false;
8724 	    }
8725 	}
8726       if (expand_vec_cond_expr_p (vectype, comp_vectype,
8727 				     cond_code))
8728 	{
8729 	  if (!slp_node)
8730 	    vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8731 	  return true;
8732 	}
8733       return false;
8734     }
8735 
8736   /* Transform.  */
8737 
8738   if (!slp_node)
8739     {
8740       vec_oprnds0.create (1);
8741       vec_oprnds1.create (1);
8742       vec_oprnds2.create (1);
8743       vec_oprnds3.create (1);
8744     }
8745 
8746   /* Handle def.  */
8747   scalar_dest = gimple_assign_lhs (stmt);
8748   if (reduction_type != EXTRACT_LAST_REDUCTION)
8749     vec_dest = vect_create_destination_var (scalar_dest, vectype);
8750 
8751   /* Handle cond expr.  */
8752   for (j = 0; j < ncopies; j++)
8753     {
8754       gimple *new_stmt = NULL;
8755       if (j == 0)
8756 	{
8757           if (slp_node)
8758             {
8759               auto_vec<tree, 4> ops;
8760 	      auto_vec<vec<tree>, 4> vec_defs;
8761 
8762 	      if (masked)
8763 		ops.safe_push (cond_expr);
8764 	      else
8765 		{
8766 		  ops.safe_push (cond_expr0);
8767 		  ops.safe_push (cond_expr1);
8768 		}
8769               ops.safe_push (then_clause);
8770               ops.safe_push (else_clause);
8771               vect_get_slp_defs (ops, slp_node, &vec_defs);
8772 	      vec_oprnds3 = vec_defs.pop ();
8773 	      vec_oprnds2 = vec_defs.pop ();
8774 	      if (!masked)
8775 		vec_oprnds1 = vec_defs.pop ();
8776 	      vec_oprnds0 = vec_defs.pop ();
8777             }
8778           else
8779             {
8780 	      gimple *gtemp;
8781 	      if (masked)
8782 		{
8783 		  vec_cond_lhs
8784 		    = vect_get_vec_def_for_operand (cond_expr, stmt,
8785 						    comp_vectype);
8786 		  vect_is_simple_use (cond_expr, stmt_info->vinfo,
8787 				      &gtemp, &dts[0]);
8788 		}
8789 	      else
8790 		{
8791 		  vec_cond_lhs
8792 		    = vect_get_vec_def_for_operand (cond_expr0,
8793 						    stmt, comp_vectype);
8794 		  vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8795 
8796 		  vec_cond_rhs
8797 		    = vect_get_vec_def_for_operand (cond_expr1,
8798 						    stmt, comp_vectype);
8799 		  vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8800 		}
8801 	      if (reduc_index == 1)
8802 		vec_then_clause = reduc_def;
8803 	      else
8804 		{
8805 		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8806 								  stmt);
8807 	          vect_is_simple_use (then_clause, loop_vinfo,
8808 				      &gtemp, &dts[2]);
8809 		}
8810 	      if (reduc_index == 2)
8811 		vec_else_clause = reduc_def;
8812 	      else
8813 		{
8814 		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8815 								  stmt);
8816 		  vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8817 		}
8818 	    }
8819 	}
8820       else
8821 	{
8822 	  vec_cond_lhs
8823 	    = vect_get_vec_def_for_stmt_copy (dts[0],
8824 					      vec_oprnds0.pop ());
8825 	  if (!masked)
8826 	    vec_cond_rhs
8827 	      = vect_get_vec_def_for_stmt_copy (dts[1],
8828 						vec_oprnds1.pop ());
8829 
8830 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8831 							    vec_oprnds2.pop ());
8832 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8833 							    vec_oprnds3.pop ());
8834 	}
8835 
8836       if (!slp_node)
8837         {
8838 	  vec_oprnds0.quick_push (vec_cond_lhs);
8839 	  if (!masked)
8840 	    vec_oprnds1.quick_push (vec_cond_rhs);
8841 	  vec_oprnds2.quick_push (vec_then_clause);
8842 	  vec_oprnds3.quick_push (vec_else_clause);
8843 	}
8844 
8845       /* Arguments are ready.  Create the new vector stmt.  */
8846       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8847         {
8848           vec_then_clause = vec_oprnds2[i];
8849           vec_else_clause = vec_oprnds3[i];
8850 
8851 	  if (masked)
8852 	    vec_compare = vec_cond_lhs;
8853 	  else
8854 	    {
8855 	      vec_cond_rhs = vec_oprnds1[i];
8856 	      if (bitop1 == NOP_EXPR)
8857 		vec_compare = build2 (cond_code, vec_cmp_type,
8858 				      vec_cond_lhs, vec_cond_rhs);
8859 	      else
8860 		{
8861 		  new_temp = make_ssa_name (vec_cmp_type);
8862 		  if (bitop1 == BIT_NOT_EXPR)
8863 		    new_stmt = gimple_build_assign (new_temp, bitop1,
8864 						    vec_cond_rhs);
8865 		  else
8866 		    new_stmt
8867 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8868 					     vec_cond_rhs);
8869 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8870 		  if (bitop2 == NOP_EXPR)
8871 		    vec_compare = new_temp;
8872 		  else if (bitop2 == BIT_NOT_EXPR)
8873 		    {
8874 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
8875 		      vec_compare = new_temp;
8876 		      std::swap (vec_then_clause, vec_else_clause);
8877 		    }
8878 		  else
8879 		    {
8880 		      vec_compare = make_ssa_name (vec_cmp_type);
8881 		      new_stmt
8882 			= gimple_build_assign (vec_compare, bitop2,
8883 					       vec_cond_lhs, new_temp);
8884 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8885 		    }
8886 		}
8887 	    }
8888 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
8889 	    {
8890 	      if (!is_gimple_val (vec_compare))
8891 		{
8892 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
8893 		  new_stmt = gimple_build_assign (vec_compare_name,
8894 						  vec_compare);
8895 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8896 		  vec_compare = vec_compare_name;
8897 		}
8898 	      gcc_assert (reduc_index == 2);
8899 	      new_stmt = gimple_build_call_internal
8900 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8901 		 vec_then_clause);
8902 	      gimple_call_set_lhs (new_stmt, scalar_dest);
8903 	      SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8904 	      if (stmt == gsi_stmt (*gsi))
8905 		vect_finish_replace_stmt (stmt, new_stmt);
8906 	      else
8907 		{
8908 		  /* In this case we're moving the definition to later in the
8909 		     block.  That doesn't matter because the only uses of the
8910 		     lhs are in phi statements.  */
8911 		  gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8912 		  gsi_remove (&old_gsi, true);
8913 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8914 		}
8915 	    }
8916 	  else
8917 	    {
8918 	      new_temp = make_ssa_name (vec_dest);
8919 	      new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8920 					      vec_compare, vec_then_clause,
8921 					      vec_else_clause);
8922 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8923 	    }
8924           if (slp_node)
8925             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8926         }
8927 
8928         if (slp_node)
8929           continue;
8930 
8931         if (j == 0)
8932           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8933         else
8934           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8935 
8936         prev_stmt_info = vinfo_for_stmt (new_stmt);
8937     }
8938 
8939   vec_oprnds0.release ();
8940   vec_oprnds1.release ();
8941   vec_oprnds2.release ();
8942   vec_oprnds3.release ();
8943 
8944   return true;
8945 }
8946 
8947 /* vectorizable_comparison.
8948 
8949    Check if STMT is comparison expression that can be vectorized.
8950    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8951    comparison, put it in VEC_STMT, and insert it at GSI.
8952 
8953    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8954 
8955 static bool
8956 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8957 			 gimple **vec_stmt, tree reduc_def,
8958 			 slp_tree slp_node)
8959 {
8960   tree lhs, rhs1, rhs2;
8961   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8962   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8963   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8964   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8965   tree new_temp;
8966   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8967   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8968   int ndts = 2;
8969   poly_uint64 nunits;
8970   int ncopies;
8971   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8972   stmt_vec_info prev_stmt_info = NULL;
8973   int i, j;
8974   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8975   vec<tree> vec_oprnds0 = vNULL;
8976   vec<tree> vec_oprnds1 = vNULL;
8977   gimple *def_stmt;
8978   tree mask_type;
8979   tree mask;
8980 
8981   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8982     return false;
8983 
8984   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8985     return false;
8986 
8987   mask_type = vectype;
8988   nunits = TYPE_VECTOR_SUBPARTS (vectype);
8989 
8990   if (slp_node)
8991     ncopies = 1;
8992   else
8993     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8994 
8995   gcc_assert (ncopies >= 1);
8996   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8997       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8998 	   && reduc_def))
8999     return false;
9000 
9001   if (STMT_VINFO_LIVE_P (stmt_info))
9002     {
9003       if (dump_enabled_p ())
9004 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9005 			 "value used after loop.\n");
9006       return false;
9007     }
9008 
9009   if (!is_gimple_assign (stmt))
9010     return false;
9011 
9012   code = gimple_assign_rhs_code (stmt);
9013 
9014   if (TREE_CODE_CLASS (code) != tcc_comparison)
9015     return false;
9016 
9017   rhs1 = gimple_assign_rhs1 (stmt);
9018   rhs2 = gimple_assign_rhs2 (stmt);
9019 
9020   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9021 			   &dts[0], &vectype1))
9022     return false;
9023 
9024   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9025 			   &dts[1], &vectype2))
9026     return false;
9027 
9028   if (vectype1 && vectype2
9029       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9030 		   TYPE_VECTOR_SUBPARTS (vectype2)))
9031     return false;
9032 
9033   vectype = vectype1 ? vectype1 : vectype2;
9034 
9035   /* Invariant comparison.  */
9036   if (!vectype)
9037     {
9038       vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9039       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9040 	return false;
9041     }
9042   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9043     return false;
9044 
9045   /* Can't compare mask and non-mask types.  */
9046   if (vectype1 && vectype2
9047       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9048     return false;
9049 
9050   /* Boolean values may have another representation in vectors
9051      and therefore we prefer bit operations over comparison for
9052      them (which also works for scalar masks).  We store opcodes
9053      to use in bitop1 and bitop2.  Statement is vectorized as
9054        BITOP2 (rhs1 BITOP1 rhs2) or
9055        rhs1 BITOP2 (BITOP1 rhs2)
9056      depending on bitop1 and bitop2 arity.  */
9057   if (VECTOR_BOOLEAN_TYPE_P (vectype))
9058     {
9059       if (code == GT_EXPR)
9060 	{
9061 	  bitop1 = BIT_NOT_EXPR;
9062 	  bitop2 = BIT_AND_EXPR;
9063 	}
9064       else if (code == GE_EXPR)
9065 	{
9066 	  bitop1 = BIT_NOT_EXPR;
9067 	  bitop2 = BIT_IOR_EXPR;
9068 	}
9069       else if (code == LT_EXPR)
9070 	{
9071 	  bitop1 = BIT_NOT_EXPR;
9072 	  bitop2 = BIT_AND_EXPR;
9073 	  std::swap (rhs1, rhs2);
9074 	  std::swap (dts[0], dts[1]);
9075 	}
9076       else if (code == LE_EXPR)
9077 	{
9078 	  bitop1 = BIT_NOT_EXPR;
9079 	  bitop2 = BIT_IOR_EXPR;
9080 	  std::swap (rhs1, rhs2);
9081 	  std::swap (dts[0], dts[1]);
9082 	}
9083       else
9084 	{
9085 	  bitop1 = BIT_XOR_EXPR;
9086 	  if (code == EQ_EXPR)
9087 	    bitop2 = BIT_NOT_EXPR;
9088 	}
9089     }
9090 
9091   if (!vec_stmt)
9092     {
9093       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9094       if (!slp_node)
9095 	vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9096 				dts, ndts, NULL, NULL);
9097       if (bitop1 == NOP_EXPR)
9098 	return expand_vec_cmp_expr_p (vectype, mask_type, code);
9099       else
9100 	{
9101 	  machine_mode mode = TYPE_MODE (vectype);
9102 	  optab optab;
9103 
9104 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
9105 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9106 	    return false;
9107 
9108 	  if (bitop2 != NOP_EXPR)
9109 	    {
9110 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
9111 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9112 		return false;
9113 	    }
9114 	  return true;
9115 	}
9116     }
9117 
9118   /* Transform.  */
9119   if (!slp_node)
9120     {
9121       vec_oprnds0.create (1);
9122       vec_oprnds1.create (1);
9123     }
9124 
9125   /* Handle def.  */
9126   lhs = gimple_assign_lhs (stmt);
9127   mask = vect_create_destination_var (lhs, mask_type);
9128 
9129   /* Handle cmp expr.  */
9130   for (j = 0; j < ncopies; j++)
9131     {
9132       gassign *new_stmt = NULL;
9133       if (j == 0)
9134 	{
9135 	  if (slp_node)
9136 	    {
9137 	      auto_vec<tree, 2> ops;
9138 	      auto_vec<vec<tree>, 2> vec_defs;
9139 
9140 	      ops.safe_push (rhs1);
9141 	      ops.safe_push (rhs2);
9142 	      vect_get_slp_defs (ops, slp_node, &vec_defs);
9143 	      vec_oprnds1 = vec_defs.pop ();
9144 	      vec_oprnds0 = vec_defs.pop ();
9145 	    }
9146 	  else
9147 	    {
9148 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9149 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9150 	    }
9151 	}
9152       else
9153 	{
9154 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9155 						     vec_oprnds0.pop ());
9156 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9157 						     vec_oprnds1.pop ());
9158 	}
9159 
9160       if (!slp_node)
9161 	{
9162 	  vec_oprnds0.quick_push (vec_rhs1);
9163 	  vec_oprnds1.quick_push (vec_rhs2);
9164 	}
9165 
9166       /* Arguments are ready.  Create the new vector stmt.  */
9167       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9168 	{
9169 	  vec_rhs2 = vec_oprnds1[i];
9170 
9171 	  new_temp = make_ssa_name (mask);
9172 	  if (bitop1 == NOP_EXPR)
9173 	    {
9174 	      new_stmt = gimple_build_assign (new_temp, code,
9175 					      vec_rhs1, vec_rhs2);
9176 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
9177 	    }
9178 	  else
9179 	    {
9180 	      if (bitop1 == BIT_NOT_EXPR)
9181 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9182 	      else
9183 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9184 						vec_rhs2);
9185 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
9186 	      if (bitop2 != NOP_EXPR)
9187 		{
9188 		  tree res = make_ssa_name (mask);
9189 		  if (bitop2 == BIT_NOT_EXPR)
9190 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
9191 		  else
9192 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9193 						    new_temp);
9194 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
9195 		}
9196 	    }
9197 	  if (slp_node)
9198 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9199 	}
9200 
9201       if (slp_node)
9202 	continue;
9203 
9204       if (j == 0)
9205 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9206       else
9207 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9208 
9209       prev_stmt_info = vinfo_for_stmt (new_stmt);
9210     }
9211 
9212   vec_oprnds0.release ();
9213   vec_oprnds1.release ();
9214 
9215   return true;
9216 }
9217 
9218 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9219    can handle all live statements in the node.  Otherwise return true
9220    if STMT is not live or if vectorizable_live_operation can handle it.
9221    GSI and VEC_STMT are as for vectorizable_live_operation.  */
9222 
9223 static bool
9224 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9225 			  slp_tree slp_node, gimple **vec_stmt)
9226 {
9227   if (slp_node)
9228     {
9229       gimple *slp_stmt;
9230       unsigned int i;
9231       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9232 	{
9233 	  stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9234 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
9235 	      && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9236 					       vec_stmt))
9237 	    return false;
9238 	}
9239     }
9240   else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9241 	   && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9242     return false;
9243 
9244   return true;
9245 }
9246 
9247 /* Make sure the statement is vectorizable.  */
9248 
9249 bool
9250 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9251 		   slp_instance node_instance)
9252 {
9253   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9254   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9255   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9256   bool ok;
9257   gimple *pattern_stmt;
9258   gimple_seq pattern_def_seq;
9259 
9260   if (dump_enabled_p ())
9261     {
9262       dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9263       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9264     }
9265 
9266   if (gimple_has_volatile_ops (stmt))
9267     {
9268       if (dump_enabled_p ())
9269         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9270                          "not vectorized: stmt has volatile operands\n");
9271 
9272       return false;
9273     }
9274 
9275   /* Skip stmts that do not need to be vectorized. In loops this is expected
9276      to include:
9277      - the COND_EXPR which is the loop exit condition
9278      - any LABEL_EXPRs in the loop
9279      - computations that are used only for array indexing or loop control.
9280      In basic blocks we only analyze statements that are a part of some SLP
9281      instance, therefore, all the statements are relevant.
9282 
9283      Pattern statement needs to be analyzed instead of the original statement
9284      if the original statement is not relevant.  Otherwise, we analyze both
9285      statements.  In basic blocks we are called from some SLP instance
9286      traversal, don't analyze pattern stmts instead, the pattern stmts
9287      already will be part of SLP instance.  */
9288 
9289   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9290   if (!STMT_VINFO_RELEVANT_P (stmt_info)
9291       && !STMT_VINFO_LIVE_P (stmt_info))
9292     {
9293       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9294           && pattern_stmt
9295           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9296               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9297         {
9298           /* Analyze PATTERN_STMT instead of the original stmt.  */
9299           stmt = pattern_stmt;
9300           stmt_info = vinfo_for_stmt (pattern_stmt);
9301           if (dump_enabled_p ())
9302             {
9303               dump_printf_loc (MSG_NOTE, vect_location,
9304                                "==> examining pattern statement: ");
9305               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9306             }
9307         }
9308       else
9309         {
9310           if (dump_enabled_p ())
9311             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9312 
9313           return true;
9314         }
9315     }
9316   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9317 	   && node == NULL
9318            && pattern_stmt
9319            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9320                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9321     {
9322       /* Analyze PATTERN_STMT too.  */
9323       if (dump_enabled_p ())
9324         {
9325           dump_printf_loc (MSG_NOTE, vect_location,
9326                            "==> examining pattern statement: ");
9327           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9328         }
9329 
9330       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9331 			      node_instance))
9332         return false;
9333    }
9334 
9335   if (is_pattern_stmt_p (stmt_info)
9336       && node == NULL
9337       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9338     {
9339       gimple_stmt_iterator si;
9340 
9341       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9342 	{
9343 	  gimple *pattern_def_stmt = gsi_stmt (si);
9344 	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9345 	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9346 	    {
9347 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
9348 	      if (dump_enabled_p ())
9349 		{
9350 		  dump_printf_loc (MSG_NOTE, vect_location,
9351                                    "==> examining pattern def statement: ");
9352 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9353 		}
9354 
9355 	      if (!vect_analyze_stmt (pattern_def_stmt,
9356 				      need_to_vectorize, node, node_instance))
9357 		return false;
9358 	    }
9359 	}
9360     }
9361 
9362   switch (STMT_VINFO_DEF_TYPE (stmt_info))
9363     {
9364       case vect_internal_def:
9365         break;
9366 
9367       case vect_reduction_def:
9368       case vect_nested_cycle:
9369          gcc_assert (!bb_vinfo
9370 		     && (relevance == vect_used_in_outer
9371 			 || relevance == vect_used_in_outer_by_reduction
9372 			 || relevance == vect_used_by_reduction
9373 			 || relevance == vect_unused_in_scope
9374 			 || relevance == vect_used_only_live));
9375          break;
9376 
9377       case vect_induction_def:
9378 	gcc_assert (!bb_vinfo);
9379 	break;
9380 
9381       case vect_constant_def:
9382       case vect_external_def:
9383       case vect_unknown_def_type:
9384       default:
9385         gcc_unreachable ();
9386     }
9387 
9388   if (STMT_VINFO_RELEVANT_P (stmt_info))
9389     {
9390       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9391       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9392 		  || (is_gimple_call (stmt)
9393 		      && gimple_call_lhs (stmt) == NULL_TREE));
9394       *need_to_vectorize = true;
9395     }
9396 
9397   if (PURE_SLP_STMT (stmt_info) && !node)
9398     {
9399       dump_printf_loc (MSG_NOTE, vect_location,
9400 		       "handled only by SLP analysis\n");
9401       return true;
9402     }
9403 
9404   ok = true;
9405   if (!bb_vinfo
9406       && (STMT_VINFO_RELEVANT_P (stmt_info)
9407 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9408     ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9409 	  || vectorizable_conversion (stmt, NULL, NULL, node)
9410 	  || vectorizable_shift (stmt, NULL, NULL, node)
9411 	  || vectorizable_operation (stmt, NULL, NULL, node)
9412 	  || vectorizable_assignment (stmt, NULL, NULL, node)
9413 	  || vectorizable_load (stmt, NULL, NULL, node, NULL)
9414 	  || vectorizable_call (stmt, NULL, NULL, node)
9415 	  || vectorizable_store (stmt, NULL, NULL, node)
9416 	  || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
9417 	  || vectorizable_induction (stmt, NULL, NULL, node)
9418 	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9419 	  || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9420   else
9421     {
9422       if (bb_vinfo)
9423 	ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9424 	      || vectorizable_conversion (stmt, NULL, NULL, node)
9425 	      || vectorizable_shift (stmt, NULL, NULL, node)
9426 	      || vectorizable_operation (stmt, NULL, NULL, node)
9427 	      || vectorizable_assignment (stmt, NULL, NULL, node)
9428 	      || vectorizable_load (stmt, NULL, NULL, node, NULL)
9429 	      || vectorizable_call (stmt, NULL, NULL, node)
9430 	      || vectorizable_store (stmt, NULL, NULL, node)
9431 	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9432 	      || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9433     }
9434 
9435   if (!ok)
9436     {
9437       if (dump_enabled_p ())
9438         {
9439           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9440                            "not vectorized: relevant stmt not ");
9441           dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9442           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9443         }
9444 
9445       return false;
9446     }
9447 
9448   if (bb_vinfo)
9449     return true;
9450 
9451   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9452       need extra handling, except for vectorizable reductions.  */
9453   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9454       && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9455     {
9456       if (dump_enabled_p ())
9457         {
9458           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9459                            "not vectorized: live stmt not supported: ");
9460           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9461         }
9462 
9463        return false;
9464     }
9465 
9466   return true;
9467 }
9468 
9469 
9470 /* Function vect_transform_stmt.
9471 
9472    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
9473 
9474 bool
9475 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9476 		     bool *grouped_store, slp_tree slp_node,
9477                      slp_instance slp_node_instance)
9478 {
9479   bool is_store = false;
9480   gimple *vec_stmt = NULL;
9481   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9482   bool done;
9483 
9484   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9485   gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9486 
9487   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9488 		   && nested_in_vect_loop_p
9489 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9490 			 stmt));
9491 
9492   switch (STMT_VINFO_TYPE (stmt_info))
9493     {
9494     case type_demotion_vec_info_type:
9495     case type_promotion_vec_info_type:
9496     case type_conversion_vec_info_type:
9497       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9498       gcc_assert (done);
9499       break;
9500 
9501     case induc_vec_info_type:
9502       done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9503       gcc_assert (done);
9504       break;
9505 
9506     case shift_vec_info_type:
9507       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9508       gcc_assert (done);
9509       break;
9510 
9511     case op_vec_info_type:
9512       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9513       gcc_assert (done);
9514       break;
9515 
9516     case assignment_vec_info_type:
9517       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9518       gcc_assert (done);
9519       break;
9520 
9521     case load_vec_info_type:
9522       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9523                                 slp_node_instance);
9524       gcc_assert (done);
9525       break;
9526 
9527     case store_vec_info_type:
9528       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9529       gcc_assert (done);
9530       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9531 	{
9532 	  /* In case of interleaving, the whole chain is vectorized when the
9533 	     last store in the chain is reached.  Store stmts before the last
9534 	     one are skipped, and there vec_stmt_info shouldn't be freed
9535 	     meanwhile.  */
9536 	  *grouped_store = true;
9537 	  stmt_vec_info group_info
9538 	    = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9539 	  if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
9540 	    is_store = true;
9541 	}
9542       else
9543 	is_store = true;
9544       break;
9545 
9546     case condition_vec_info_type:
9547       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9548       gcc_assert (done);
9549       break;
9550 
9551     case comparison_vec_info_type:
9552       done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9553       gcc_assert (done);
9554       break;
9555 
9556     case call_vec_info_type:
9557       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9558       stmt = gsi_stmt (*gsi);
9559       break;
9560 
9561     case call_simd_clone_vec_info_type:
9562       done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9563       stmt = gsi_stmt (*gsi);
9564       break;
9565 
9566     case reduc_vec_info_type:
9567       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9568 				     slp_node_instance);
9569       gcc_assert (done);
9570       break;
9571 
9572     default:
9573       if (!STMT_VINFO_LIVE_P (stmt_info))
9574 	{
9575 	  if (dump_enabled_p ())
9576 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9577                              "stmt not supported.\n");
9578 	  gcc_unreachable ();
9579 	}
9580     }
9581 
9582   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9583      This would break hybrid SLP vectorization.  */
9584   if (slp_node)
9585     gcc_assert (!vec_stmt
9586 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9587 
9588   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9589      is being vectorized, but outside the immediately enclosing loop.  */
9590   if (vec_stmt
9591       && nested_p
9592       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9593       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9594           || STMT_VINFO_RELEVANT (stmt_info) ==
9595                                            vect_used_in_outer_by_reduction))
9596     {
9597       struct loop *innerloop = LOOP_VINFO_LOOP (
9598                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9599       imm_use_iterator imm_iter;
9600       use_operand_p use_p;
9601       tree scalar_dest;
9602       gimple *exit_phi;
9603 
9604       if (dump_enabled_p ())
9605         dump_printf_loc (MSG_NOTE, vect_location,
9606                          "Record the vdef for outer-loop vectorization.\n");
9607 
9608       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9609         (to be used when vectorizing outer-loop stmts that use the DEF of
9610         STMT).  */
9611       if (gimple_code (stmt) == GIMPLE_PHI)
9612         scalar_dest = PHI_RESULT (stmt);
9613       else
9614         scalar_dest = gimple_get_lhs (stmt);
9615 
9616       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9617        {
9618          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9619            {
9620              exit_phi = USE_STMT (use_p);
9621              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9622            }
9623        }
9624     }
9625 
9626   /* Handle stmts whose DEF is used outside the loop-nest that is
9627      being vectorized.  */
9628   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9629     {
9630       done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9631       gcc_assert (done);
9632     }
9633 
9634   if (vec_stmt)
9635     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9636 
9637   return is_store;
9638 }
9639 
9640 
9641 /* Remove a group of stores (for SLP or interleaving), free their
9642    stmt_vec_info.  */
9643 
9644 void
9645 vect_remove_stores (gimple *first_stmt)
9646 {
9647   gimple *next = first_stmt;
9648   gimple *tmp;
9649   gimple_stmt_iterator next_si;
9650 
9651   while (next)
9652     {
9653       stmt_vec_info stmt_info = vinfo_for_stmt (next);
9654 
9655       tmp = GROUP_NEXT_ELEMENT (stmt_info);
9656       if (is_pattern_stmt_p (stmt_info))
9657 	next = STMT_VINFO_RELATED_STMT (stmt_info);
9658       /* Free the attached stmt_vec_info and remove the stmt.  */
9659       next_si = gsi_for_stmt (next);
9660       unlink_stmt_vdef (next);
9661       gsi_remove (&next_si, true);
9662       release_defs (next);
9663       free_stmt_vec_info (next);
9664       next = tmp;
9665     }
9666 }
9667 
9668 
9669 /* Function new_stmt_vec_info.
9670 
9671    Create and initialize a new stmt_vec_info struct for STMT.  */
9672 
9673 stmt_vec_info
9674 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9675 {
9676   stmt_vec_info res;
9677   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9678 
9679   STMT_VINFO_TYPE (res) = undef_vec_info_type;
9680   STMT_VINFO_STMT (res) = stmt;
9681   res->vinfo = vinfo;
9682   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9683   STMT_VINFO_LIVE_P (res) = false;
9684   STMT_VINFO_VECTYPE (res) = NULL;
9685   STMT_VINFO_VEC_STMT (res) = NULL;
9686   STMT_VINFO_VECTORIZABLE (res) = true;
9687   STMT_VINFO_IN_PATTERN_P (res) = false;
9688   STMT_VINFO_RELATED_STMT (res) = NULL;
9689   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9690   STMT_VINFO_DATA_REF (res) = NULL;
9691   STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9692   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9693 
9694   if (gimple_code (stmt) == GIMPLE_PHI
9695       && is_loop_header_bb_p (gimple_bb (stmt)))
9696     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9697   else
9698     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9699 
9700   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9701   STMT_SLP_TYPE (res) = loop_vect;
9702   STMT_VINFO_NUM_SLP_USES (res) = 0;
9703 
9704   GROUP_FIRST_ELEMENT (res) = NULL;
9705   GROUP_NEXT_ELEMENT (res) = NULL;
9706   GROUP_SIZE (res) = 0;
9707   GROUP_STORE_COUNT (res) = 0;
9708   GROUP_GAP (res) = 0;
9709   GROUP_SAME_DR_STMT (res) = NULL;
9710 
9711   return res;
9712 }
9713 
9714 
9715 /* Create a hash table for stmt_vec_info. */
9716 
9717 void
9718 init_stmt_vec_info_vec (void)
9719 {
9720   gcc_assert (!stmt_vec_info_vec.exists ());
9721   stmt_vec_info_vec.create (50);
9722 }
9723 
9724 
9725 /* Free hash table for stmt_vec_info. */
9726 
9727 void
9728 free_stmt_vec_info_vec (void)
9729 {
9730   unsigned int i;
9731   stmt_vec_info info;
9732   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9733     if (info != NULL)
9734       free_stmt_vec_info (STMT_VINFO_STMT (info));
9735   gcc_assert (stmt_vec_info_vec.exists ());
9736   stmt_vec_info_vec.release ();
9737 }
9738 
9739 
9740 /* Free stmt vectorization related info.  */
9741 
9742 void
9743 free_stmt_vec_info (gimple *stmt)
9744 {
9745   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9746 
9747   if (!stmt_info)
9748     return;
9749 
9750   /* Check if this statement has a related "pattern stmt"
9751      (introduced by the vectorizer during the pattern recognition
9752      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9753      too.  */
9754   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9755     {
9756       stmt_vec_info patt_info
9757 	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9758       if (patt_info)
9759 	{
9760 	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9761 	  gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9762 	  gimple_set_bb (patt_stmt, NULL);
9763 	  tree lhs = gimple_get_lhs (patt_stmt);
9764 	  if (lhs && TREE_CODE (lhs) == SSA_NAME)
9765 	    release_ssa_name (lhs);
9766 	  if (seq)
9767 	    {
9768 	      gimple_stmt_iterator si;
9769 	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9770 		{
9771 		  gimple *seq_stmt = gsi_stmt (si);
9772 		  gimple_set_bb (seq_stmt, NULL);
9773 		  lhs = gimple_get_lhs (seq_stmt);
9774 		  if (lhs && TREE_CODE (lhs) == SSA_NAME)
9775 		    release_ssa_name (lhs);
9776 		  free_stmt_vec_info (seq_stmt);
9777 		}
9778 	    }
9779 	  free_stmt_vec_info (patt_stmt);
9780 	}
9781     }
9782 
9783   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9784   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9785   set_vinfo_for_stmt (stmt, NULL);
9786   free (stmt_info);
9787 }
9788 
9789 
9790 /* Function get_vectype_for_scalar_type_and_size.
9791 
9792    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
9793    by the target.  */
9794 
9795 tree
9796 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9797 {
9798   tree orig_scalar_type = scalar_type;
9799   scalar_mode inner_mode;
9800   machine_mode simd_mode;
9801   poly_uint64 nunits;
9802   tree vectype;
9803 
9804   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9805       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9806     return NULL_TREE;
9807 
9808   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9809 
9810   /* For vector types of elements whose mode precision doesn't
9811      match their types precision we use a element type of mode
9812      precision.  The vectorization routines will have to make sure
9813      they support the proper result truncation/extension.
9814      We also make sure to build vector types with INTEGER_TYPE
9815      component type only.  */
9816   if (INTEGRAL_TYPE_P (scalar_type)
9817       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9818 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
9819     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9820 						  TYPE_UNSIGNED (scalar_type));
9821 
9822   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9823      When the component mode passes the above test simply use a type
9824      corresponding to that mode.  The theory is that any use that
9825      would cause problems with this will disable vectorization anyway.  */
9826   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9827 	   && !INTEGRAL_TYPE_P (scalar_type))
9828     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9829 
9830   /* We can't build a vector type of elements with alignment bigger than
9831      their size.  */
9832   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9833     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9834 						  TYPE_UNSIGNED (scalar_type));
9835 
9836   /* If we felt back to using the mode fail if there was
9837      no scalar type for it.  */
9838   if (scalar_type == NULL_TREE)
9839     return NULL_TREE;
9840 
9841   /* If no size was supplied use the mode the target prefers.   Otherwise
9842      lookup a vector mode of the specified size.  */
9843   if (known_eq (size, 0U))
9844     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9845   else if (!multiple_p (size, nbytes, &nunits)
9846 	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9847     return NULL_TREE;
9848   /* NOTE: nunits == 1 is allowed to support single element vector types.  */
9849   if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9850     return NULL_TREE;
9851 
9852   vectype = build_vector_type (scalar_type, nunits);
9853 
9854   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9855       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9856     return NULL_TREE;
9857 
9858   /* Re-attach the address-space qualifier if we canonicalized the scalar
9859      type.  */
9860   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9861     return build_qualified_type
9862 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9863 
9864   return vectype;
9865 }
9866 
9867 poly_uint64 current_vector_size;
9868 
9869 /* Function get_vectype_for_scalar_type.
9870 
9871    Returns the vector type corresponding to SCALAR_TYPE as supported
9872    by the target.  */
9873 
9874 tree
9875 get_vectype_for_scalar_type (tree scalar_type)
9876 {
9877   tree vectype;
9878   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9879 						  current_vector_size);
9880   if (vectype
9881       && known_eq (current_vector_size, 0U))
9882     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9883   return vectype;
9884 }
9885 
9886 /* Function get_mask_type_for_scalar_type.
9887 
9888    Returns the mask type corresponding to a result of comparison
9889    of vectors of specified SCALAR_TYPE as supported by target.  */
9890 
9891 tree
9892 get_mask_type_for_scalar_type (tree scalar_type)
9893 {
9894   tree vectype = get_vectype_for_scalar_type (scalar_type);
9895 
9896   if (!vectype)
9897     return NULL;
9898 
9899   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9900 				  current_vector_size);
9901 }
9902 
9903 /* Function get_same_sized_vectype
9904 
9905    Returns a vector type corresponding to SCALAR_TYPE of size
9906    VECTOR_TYPE if supported by the target.  */
9907 
9908 tree
9909 get_same_sized_vectype (tree scalar_type, tree vector_type)
9910 {
9911   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9912     return build_same_sized_truth_vector_type (vector_type);
9913 
9914   return get_vectype_for_scalar_type_and_size
9915 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9916 }
9917 
9918 /* Function vect_is_simple_use.
9919 
9920    Input:
9921    VINFO - the vect info of the loop or basic block that is being vectorized.
9922    OPERAND - operand in the loop or bb.
9923    Output:
9924    DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9925    DT - the type of definition
9926 
9927    Returns whether a stmt with OPERAND can be vectorized.
9928    For loops, supportable operands are constants, loop invariants, and operands
9929    that are defined by the current iteration of the loop.  Unsupportable
9930    operands are those that are defined by a previous iteration of the loop (as
9931    is the case in reduction/induction computations).
9932    For basic blocks, supportable operands are constants and bb invariants.
9933    For now, operands defined outside the basic block are not supported.  */
9934 
9935 bool
9936 vect_is_simple_use (tree operand, vec_info *vinfo,
9937                     gimple **def_stmt, enum vect_def_type *dt)
9938 {
9939   *def_stmt = NULL;
9940   *dt = vect_unknown_def_type;
9941 
9942   if (dump_enabled_p ())
9943     {
9944       dump_printf_loc (MSG_NOTE, vect_location,
9945                        "vect_is_simple_use: operand ");
9946       dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9947       dump_printf (MSG_NOTE, "\n");
9948     }
9949 
9950   if (CONSTANT_CLASS_P (operand))
9951     {
9952       *dt = vect_constant_def;
9953       return true;
9954     }
9955 
9956   if (is_gimple_min_invariant (operand))
9957     {
9958       *dt = vect_external_def;
9959       return true;
9960     }
9961 
9962   if (TREE_CODE (operand) != SSA_NAME)
9963     {
9964       if (dump_enabled_p ())
9965 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9966 			 "not ssa-name.\n");
9967       return false;
9968     }
9969 
9970   if (SSA_NAME_IS_DEFAULT_DEF (operand))
9971     {
9972       *dt = vect_external_def;
9973       return true;
9974     }
9975 
9976   *def_stmt = SSA_NAME_DEF_STMT (operand);
9977   if (dump_enabled_p ())
9978     {
9979       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9980       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9981     }
9982 
9983   if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9984     *dt = vect_external_def;
9985   else
9986     {
9987       stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9988       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9989     }
9990 
9991   if (dump_enabled_p ())
9992     {
9993       dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9994       switch (*dt)
9995 	{
9996 	case vect_uninitialized_def:
9997 	  dump_printf (MSG_NOTE, "uninitialized\n");
9998 	  break;
9999 	case vect_constant_def:
10000 	  dump_printf (MSG_NOTE, "constant\n");
10001 	  break;
10002 	case vect_external_def:
10003 	  dump_printf (MSG_NOTE, "external\n");
10004 	  break;
10005 	case vect_internal_def:
10006 	  dump_printf (MSG_NOTE, "internal\n");
10007 	  break;
10008 	case vect_induction_def:
10009 	  dump_printf (MSG_NOTE, "induction\n");
10010 	  break;
10011 	case vect_reduction_def:
10012 	  dump_printf (MSG_NOTE, "reduction\n");
10013 	  break;
10014 	case vect_double_reduction_def:
10015 	  dump_printf (MSG_NOTE, "double reduction\n");
10016 	  break;
10017 	case vect_nested_cycle:
10018 	  dump_printf (MSG_NOTE, "nested cycle\n");
10019 	  break;
10020 	case vect_unknown_def_type:
10021 	  dump_printf (MSG_NOTE, "unknown\n");
10022 	  break;
10023 	}
10024     }
10025 
10026   if (*dt == vect_unknown_def_type)
10027     {
10028       if (dump_enabled_p ())
10029         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10030                          "Unsupported pattern.\n");
10031       return false;
10032     }
10033 
10034   switch (gimple_code (*def_stmt))
10035     {
10036     case GIMPLE_PHI:
10037     case GIMPLE_ASSIGN:
10038     case GIMPLE_CALL:
10039       break;
10040     default:
10041       if (dump_enabled_p ())
10042         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10043                          "unsupported defining stmt:\n");
10044       return false;
10045     }
10046 
10047   return true;
10048 }
10049 
10050 /* Function vect_is_simple_use.
10051 
10052    Same as vect_is_simple_use but also determines the vector operand
10053    type of OPERAND and stores it to *VECTYPE.  If the definition of
10054    OPERAND is vect_uninitialized_def, vect_constant_def or
10055    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10056    is responsible to compute the best suited vector type for the
10057    scalar operand.  */
10058 
10059 bool
10060 vect_is_simple_use (tree operand, vec_info *vinfo,
10061 		    gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10062 {
10063   if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10064     return false;
10065 
10066   /* Now get a vector type if the def is internal, otherwise supply
10067      NULL_TREE and leave it up to the caller to figure out a proper
10068      type for the use stmt.  */
10069   if (*dt == vect_internal_def
10070       || *dt == vect_induction_def
10071       || *dt == vect_reduction_def
10072       || *dt == vect_double_reduction_def
10073       || *dt == vect_nested_cycle)
10074     {
10075       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10076 
10077       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10078           && !STMT_VINFO_RELEVANT (stmt_info)
10079           && !STMT_VINFO_LIVE_P (stmt_info))
10080 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10081 
10082       *vectype = STMT_VINFO_VECTYPE (stmt_info);
10083       gcc_assert (*vectype != NULL_TREE);
10084     }
10085   else if (*dt == vect_uninitialized_def
10086 	   || *dt == vect_constant_def
10087 	   || *dt == vect_external_def)
10088     *vectype = NULL_TREE;
10089   else
10090     gcc_unreachable ();
10091 
10092   return true;
10093 }
10094 
10095 
10096 /* Function supportable_widening_operation
10097 
10098    Check whether an operation represented by the code CODE is a
10099    widening operation that is supported by the target platform in
10100    vector form (i.e., when operating on arguments of type VECTYPE_IN
10101    producing a result of type VECTYPE_OUT).
10102 
10103    Widening operations we currently support are NOP (CONVERT), FLOAT
10104    and WIDEN_MULT.  This function checks if these operations are supported
10105    by the target platform either directly (via vector tree-codes), or via
10106    target builtins.
10107 
10108    Output:
10109    - CODE1 and CODE2 are codes of vector operations to be used when
10110    vectorizing the operation, if available.
10111    - MULTI_STEP_CVT determines the number of required intermediate steps in
10112    case of multi-step conversion (like char->short->int - in that case
10113    MULTI_STEP_CVT will be 1).
10114    - INTERM_TYPES contains the intermediate type required to perform the
10115    widening operation (short in the above example).  */
10116 
10117 bool
10118 supportable_widening_operation (enum tree_code code, gimple *stmt,
10119 				tree vectype_out, tree vectype_in,
10120                                 enum tree_code *code1, enum tree_code *code2,
10121                                 int *multi_step_cvt,
10122                                 vec<tree> *interm_types)
10123 {
10124   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10125   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10126   struct loop *vect_loop = NULL;
10127   machine_mode vec_mode;
10128   enum insn_code icode1, icode2;
10129   optab optab1, optab2;
10130   tree vectype = vectype_in;
10131   tree wide_vectype = vectype_out;
10132   enum tree_code c1, c2;
10133   int i;
10134   tree prev_type, intermediate_type;
10135   machine_mode intermediate_mode, prev_mode;
10136   optab optab3, optab4;
10137 
10138   *multi_step_cvt = 0;
10139   if (loop_info)
10140     vect_loop = LOOP_VINFO_LOOP (loop_info);
10141 
10142   switch (code)
10143     {
10144     case WIDEN_MULT_EXPR:
10145       /* The result of a vectorized widening operation usually requires
10146 	 two vectors (because the widened results do not fit into one vector).
10147 	 The generated vector results would normally be expected to be
10148 	 generated in the same order as in the original scalar computation,
10149 	 i.e. if 8 results are generated in each vector iteration, they are
10150 	 to be organized as follows:
10151 		vect1: [res1,res2,res3,res4],
10152 		vect2: [res5,res6,res7,res8].
10153 
10154 	 However, in the special case that the result of the widening
10155 	 operation is used in a reduction computation only, the order doesn't
10156 	 matter (because when vectorizing a reduction we change the order of
10157 	 the computation).  Some targets can take advantage of this and
10158 	 generate more efficient code.  For example, targets like Altivec,
10159 	 that support widen_mult using a sequence of {mult_even,mult_odd}
10160 	 generate the following vectors:
10161 		vect1: [res1,res3,res5,res7],
10162 		vect2: [res2,res4,res6,res8].
10163 
10164 	 When vectorizing outer-loops, we execute the inner-loop sequentially
10165 	 (each vectorized inner-loop iteration contributes to VF outer-loop
10166 	 iterations in parallel).  We therefore don't allow to change the
10167 	 order of the computation in the inner-loop during outer-loop
10168 	 vectorization.  */
10169       /* TODO: Another case in which order doesn't *really* matter is when we
10170 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
10171 	 Normally, pack_trunc performs an even/odd permute, whereas the
10172 	 repack from an even/odd expansion would be an interleave, which
10173 	 would be significantly simpler for e.g. AVX2.  */
10174       /* In any case, in order to avoid duplicating the code below, recurse
10175 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
10176 	 are properly set up for the caller.  If we fail, we'll continue with
10177 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
10178       if (vect_loop
10179 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10180 	  && !nested_in_vect_loop_p (vect_loop, stmt)
10181 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10182 					     stmt, vectype_out, vectype_in,
10183 					     code1, code2, multi_step_cvt,
10184 					     interm_types))
10185         {
10186           /* Elements in a vector with vect_used_by_reduction property cannot
10187              be reordered if the use chain with this property does not have the
10188              same operation.  One such an example is s += a * b, where elements
10189              in a and b cannot be reordered.  Here we check if the vector defined
10190              by STMT is only directly used in the reduction statement.  */
10191           tree lhs = gimple_assign_lhs (stmt);
10192           use_operand_p dummy;
10193           gimple *use_stmt;
10194           stmt_vec_info use_stmt_info = NULL;
10195           if (single_imm_use (lhs, &dummy, &use_stmt)
10196               && (use_stmt_info = vinfo_for_stmt (use_stmt))
10197               && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10198             return true;
10199         }
10200       c1 = VEC_WIDEN_MULT_LO_EXPR;
10201       c2 = VEC_WIDEN_MULT_HI_EXPR;
10202       break;
10203 
10204     case DOT_PROD_EXPR:
10205       c1 = DOT_PROD_EXPR;
10206       c2 = DOT_PROD_EXPR;
10207       break;
10208 
10209     case SAD_EXPR:
10210       c1 = SAD_EXPR;
10211       c2 = SAD_EXPR;
10212       break;
10213 
10214     case VEC_WIDEN_MULT_EVEN_EXPR:
10215       /* Support the recursion induced just above.  */
10216       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10217       c2 = VEC_WIDEN_MULT_ODD_EXPR;
10218       break;
10219 
10220     case WIDEN_LSHIFT_EXPR:
10221       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10222       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10223       break;
10224 
10225     CASE_CONVERT:
10226       c1 = VEC_UNPACK_LO_EXPR;
10227       c2 = VEC_UNPACK_HI_EXPR;
10228       break;
10229 
10230     case FLOAT_EXPR:
10231       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10232       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10233       break;
10234 
10235     case FIX_TRUNC_EXPR:
10236       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10237 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10238 	 computing the operation.  */
10239       return false;
10240 
10241     default:
10242       gcc_unreachable ();
10243     }
10244 
10245   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10246     std::swap (c1, c2);
10247 
10248   if (code == FIX_TRUNC_EXPR)
10249     {
10250       /* The signedness is determined from output operand.  */
10251       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10252       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10253     }
10254   else
10255     {
10256       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10257       optab2 = optab_for_tree_code (c2, vectype, optab_default);
10258     }
10259 
10260   if (!optab1 || !optab2)
10261     return false;
10262 
10263   vec_mode = TYPE_MODE (vectype);
10264   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10265        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10266     return false;
10267 
10268   *code1 = c1;
10269   *code2 = c2;
10270 
10271   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10272       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10273       /* For scalar masks we may have different boolean
10274 	 vector types having the same QImode.  Thus we
10275 	 add additional check for elements number.  */
10276     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10277 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10278 			 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10279 
10280   /* Check if it's a multi-step conversion that can be done using intermediate
10281      types.  */
10282 
10283   prev_type = vectype;
10284   prev_mode = vec_mode;
10285 
10286   if (!CONVERT_EXPR_CODE_P (code))
10287     return false;
10288 
10289   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10290      intermediate steps in promotion sequence.  We try
10291      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10292      not.  */
10293   interm_types->create (MAX_INTERM_CVT_STEPS);
10294   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10295     {
10296       intermediate_mode = insn_data[icode1].operand[0].mode;
10297       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10298 	{
10299 	  intermediate_type = vect_halve_mask_nunits (prev_type);
10300 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10301 	    return false;
10302 	}
10303       else
10304 	intermediate_type
10305 	  = lang_hooks.types.type_for_mode (intermediate_mode,
10306 					    TYPE_UNSIGNED (prev_type));
10307 
10308       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10309       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10310 
10311       if (!optab3 || !optab4
10312           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10313 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10314 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10315 	  || insn_data[icode2].operand[0].mode != intermediate_mode
10316 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
10317 	      == CODE_FOR_nothing)
10318 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
10319 	      == CODE_FOR_nothing))
10320 	break;
10321 
10322       interm_types->quick_push (intermediate_type);
10323       (*multi_step_cvt)++;
10324 
10325       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10326 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10327 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10328 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10329 			     TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10330 
10331       prev_type = intermediate_type;
10332       prev_mode = intermediate_mode;
10333     }
10334 
10335   interm_types->release ();
10336   return false;
10337 }
10338 
10339 
10340 /* Function supportable_narrowing_operation
10341 
10342    Check whether an operation represented by the code CODE is a
10343    narrowing operation that is supported by the target platform in
10344    vector form (i.e., when operating on arguments of type VECTYPE_IN
10345    and producing a result of type VECTYPE_OUT).
10346 
10347    Narrowing operations we currently support are NOP (CONVERT) and
10348    FIX_TRUNC.  This function checks if these operations are supported by
10349    the target platform directly via vector tree-codes.
10350 
10351    Output:
10352    - CODE1 is the code of a vector operation to be used when
10353    vectorizing the operation, if available.
10354    - MULTI_STEP_CVT determines the number of required intermediate steps in
10355    case of multi-step conversion (like int->short->char - in that case
10356    MULTI_STEP_CVT will be 1).
10357    - INTERM_TYPES contains the intermediate type required to perform the
10358    narrowing operation (short in the above example).   */
10359 
10360 bool
10361 supportable_narrowing_operation (enum tree_code code,
10362 				 tree vectype_out, tree vectype_in,
10363 				 enum tree_code *code1, int *multi_step_cvt,
10364                                  vec<tree> *interm_types)
10365 {
10366   machine_mode vec_mode;
10367   enum insn_code icode1;
10368   optab optab1, interm_optab;
10369   tree vectype = vectype_in;
10370   tree narrow_vectype = vectype_out;
10371   enum tree_code c1;
10372   tree intermediate_type, prev_type;
10373   machine_mode intermediate_mode, prev_mode;
10374   int i;
10375   bool uns;
10376 
10377   *multi_step_cvt = 0;
10378   switch (code)
10379     {
10380     CASE_CONVERT:
10381       c1 = VEC_PACK_TRUNC_EXPR;
10382       break;
10383 
10384     case FIX_TRUNC_EXPR:
10385       c1 = VEC_PACK_FIX_TRUNC_EXPR;
10386       break;
10387 
10388     case FLOAT_EXPR:
10389       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10390 	 tree code and optabs used for computing the operation.  */
10391       return false;
10392 
10393     default:
10394       gcc_unreachable ();
10395     }
10396 
10397   if (code == FIX_TRUNC_EXPR)
10398     /* The signedness is determined from output operand.  */
10399     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10400   else
10401     optab1 = optab_for_tree_code (c1, vectype, optab_default);
10402 
10403   if (!optab1)
10404     return false;
10405 
10406   vec_mode = TYPE_MODE (vectype);
10407   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10408     return false;
10409 
10410   *code1 = c1;
10411 
10412   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10413     /* For scalar masks we may have different boolean
10414        vector types having the same QImode.  Thus we
10415        add additional check for elements number.  */
10416     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10417 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10418 			 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10419 
10420   /* Check if it's a multi-step conversion that can be done using intermediate
10421      types.  */
10422   prev_mode = vec_mode;
10423   prev_type = vectype;
10424   if (code == FIX_TRUNC_EXPR)
10425     uns = TYPE_UNSIGNED (vectype_out);
10426   else
10427     uns = TYPE_UNSIGNED (vectype);
10428 
10429   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10430      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10431      costly than signed.  */
10432   if (code == FIX_TRUNC_EXPR && uns)
10433     {
10434       enum insn_code icode2;
10435 
10436       intermediate_type
10437 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10438       interm_optab
10439 	= optab_for_tree_code (c1, intermediate_type, optab_default);
10440       if (interm_optab != unknown_optab
10441 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10442 	  && insn_data[icode1].operand[0].mode
10443 	     == insn_data[icode2].operand[0].mode)
10444 	{
10445 	  uns = false;
10446 	  optab1 = interm_optab;
10447 	  icode1 = icode2;
10448 	}
10449     }
10450 
10451   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10452      intermediate steps in promotion sequence.  We try
10453      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
10454   interm_types->create (MAX_INTERM_CVT_STEPS);
10455   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10456     {
10457       intermediate_mode = insn_data[icode1].operand[0].mode;
10458       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10459 	{
10460 	  intermediate_type = vect_double_mask_nunits (prev_type);
10461 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10462 	    return false;
10463 	}
10464       else
10465 	intermediate_type
10466 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10467       interm_optab
10468 	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10469 			       optab_default);
10470       if (!interm_optab
10471 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10472 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10473 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10474 	      == CODE_FOR_nothing))
10475 	break;
10476 
10477       interm_types->quick_push (intermediate_type);
10478       (*multi_step_cvt)++;
10479 
10480       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10481 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10482 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10483 			     TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10484 
10485       prev_mode = intermediate_mode;
10486       prev_type = intermediate_type;
10487       optab1 = interm_optab;
10488     }
10489 
10490   interm_types->release ();
10491   return false;
10492 }
10493 
10494 /* Generate and return a statement that sets vector mask MASK such that
10495    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
10496 
10497 gcall *
10498 vect_gen_while (tree mask, tree start_index, tree end_index)
10499 {
10500   tree cmp_type = TREE_TYPE (start_index);
10501   tree mask_type = TREE_TYPE (mask);
10502   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10503 						       cmp_type, mask_type,
10504 						       OPTIMIZE_FOR_SPEED));
10505   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10506 					    start_index, end_index,
10507 					    build_zero_cst (mask_type));
10508   gimple_call_set_lhs (call, mask);
10509   return call;
10510 }
10511 
10512 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10513    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
10514 
10515 tree
10516 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10517 		    tree end_index)
10518 {
10519   tree tmp = make_ssa_name (mask_type);
10520   gcall *call = vect_gen_while (tmp, start_index, end_index);
10521   gimple_seq_add_stmt (seq, call);
10522   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10523 }
10524