1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
55 
56 /* For lang_hooks.types.type_for_mode.  */
57 #include "langhooks.h"
58 
59 /* Return the vectorized type for the given statement.  */
60 
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 {
64   return STMT_VINFO_VECTYPE (stmt_info);
65 }
66 
67 /* Return TRUE iff the given statement is in an inner loop relative to
68    the loop being vectorized.  */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 {
72   gimple *stmt = STMT_VINFO_STMT (stmt_info);
73   basic_block bb = gimple_bb (stmt);
74   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75   struct loop* loop;
76 
77   if (!loop_vinfo)
78     return false;
79 
80   loop = LOOP_VINFO_LOOP (loop_vinfo);
81 
82   return (bb->loop_father == loop->inner);
83 }
84 
85 /* Record the cost of a statement, either by directly informing the
86    target model or by saving it in a vector for later processing.
87    Return a preliminary estimate of the statement's cost.  */
88 
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 		  int misalign, enum vect_cost_model_location where)
93 {
94   if ((kind == vector_load || kind == unaligned_load)
95       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96     kind = vector_gather_load;
97   if ((kind == vector_store || kind == unaligned_store)
98       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99     kind = vector_scatter_store;
100   if (body_cost_vec)
101     {
102       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103       stmt_info_for_cost si = { count, kind,
104 			        stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 				misalign };
106       body_cost_vec->safe_push (si);
107       return (unsigned)
108 	(builtin_vectorization_cost (kind, vectype, misalign) * count);
109     }
110   else
111     return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 			  count, kind, stmt_info, misalign, where);
113 }
114 
115 /* Return a variable of type ELEM_TYPE[NELEMS].  */
116 
117 static tree
118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119 {
120   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 			 "vect_array");
122 }
123 
124 /* ARRAY is an array of vectors created by create_vector_array.
125    Return an SSA_NAME for the vector in index N.  The reference
126    is part of the vectorization of STMT and the vector is associated
127    with scalar destination SCALAR_DEST.  */
128 
129 static tree
130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 		   tree array, unsigned HOST_WIDE_INT n)
132 {
133   tree vect_type, vect, vect_name, array_ref;
134   gimple *new_stmt;
135 
136   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137   vect_type = TREE_TYPE (TREE_TYPE (array));
138   vect = vect_create_destination_var (scalar_dest, vect_type);
139   array_ref = build4 (ARRAY_REF, vect_type, array,
140 		      build_int_cst (size_type_node, n),
141 		      NULL_TREE, NULL_TREE);
142 
143   new_stmt = gimple_build_assign (vect, array_ref);
144   vect_name = make_ssa_name (vect, new_stmt);
145   gimple_assign_set_lhs (new_stmt, vect_name);
146   vect_finish_stmt_generation (stmt, new_stmt, gsi);
147 
148   return vect_name;
149 }
150 
151 /* ARRAY is an array of vectors created by create_vector_array.
152    Emit code to store SSA_NAME VECT in index N of the array.
153    The store is part of the vectorization of STMT.  */
154 
155 static void
156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 		    tree array, unsigned HOST_WIDE_INT n)
158 {
159   tree array_ref;
160   gimple *new_stmt;
161 
162   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 		      build_int_cst (size_type_node, n),
164 		      NULL_TREE, NULL_TREE);
165 
166   new_stmt = gimple_build_assign (array_ref, vect);
167   vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 }
169 
170 /* PTR is a pointer to an array of type TYPE.  Return a representation
171    of *PTR.  The memory reference replaces those in FIRST_DR
172    (and its group).  */
173 
174 static tree
175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 {
177   tree mem_ref;
178 
179   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180   /* Arrays have the same alignment as their type.  */
181   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182   return mem_ref;
183 }
184 
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
186 
187 /* Function vect_mark_relevant.
188 
189    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
190 
191 static void
192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193 		    enum vect_relevant relevant, bool live_p)
194 {
195   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198   gimple *pattern_stmt;
199 
200   if (dump_enabled_p ())
201     {
202       dump_printf_loc (MSG_NOTE, vect_location,
203 		       "mark relevant %d, live %d: ", relevant, live_p);
204       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205     }
206 
207   /* If this stmt is an original stmt in a pattern, we might need to mark its
208      related pattern stmt instead of the original stmt.  However, such stmts
209      may have their own uses that are not in any pattern, in such cases the
210      stmt itself should be marked.  */
211   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212     {
213       /* This is the last stmt in a sequence that was detected as a
214 	 pattern that can potentially be vectorized.  Don't mark the stmt
215 	 as relevant/live because it's not going to be vectorized.
216 	 Instead mark the pattern-stmt that replaces it.  */
217 
218       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219 
220       if (dump_enabled_p ())
221 	dump_printf_loc (MSG_NOTE, vect_location,
222 			 "last stmt in pattern. don't mark"
223 			 " relevant/live.\n");
224       stmt_info = vinfo_for_stmt (pattern_stmt);
225       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228       stmt = pattern_stmt;
229     }
230 
231   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233     STMT_VINFO_RELEVANT (stmt_info) = relevant;
234 
235   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237     {
238       if (dump_enabled_p ())
239         dump_printf_loc (MSG_NOTE, vect_location,
240                          "already marked relevant/live.\n");
241       return;
242     }
243 
244   worklist->safe_push (stmt);
245 }
246 
247 
248 /* Function is_simple_and_all_uses_invariant
249 
250    Return true if STMT is simple and all uses of it are invariant.  */
251 
252 bool
253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254 {
255   tree op;
256   gimple *def_stmt;
257   ssa_op_iter iter;
258 
259   if (!is_gimple_assign (stmt))
260     return false;
261 
262   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263     {
264       enum vect_def_type dt = vect_uninitialized_def;
265 
266       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267 	{
268 	  if (dump_enabled_p ())
269 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 			     "use not simple.\n");
271 	  return false;
272 	}
273 
274       if (dt != vect_external_def && dt != vect_constant_def)
275 	return false;
276     }
277   return true;
278 }
279 
280 /* Function vect_stmt_relevant_p.
281 
282    Return true if STMT in loop that is represented by LOOP_VINFO is
283    "relevant for vectorization".
284 
285    A stmt is considered "relevant for vectorization" if:
286    - it has uses outside the loop.
287    - it has vdefs (it alters memory).
288    - control stmts in the loop (except for the exit condition).
289 
290    CHECKME: what other side effects would the vectorizer allow?  */
291 
292 static bool
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 		      enum vect_relevant *relevant, bool *live_p)
295 {
296   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297   ssa_op_iter op_iter;
298   imm_use_iterator imm_iter;
299   use_operand_p use_p;
300   def_operand_p def_p;
301 
302   *relevant = vect_unused_in_scope;
303   *live_p = false;
304 
305   /* cond stmt other than loop exit cond.  */
306   if (is_ctrl_stmt (stmt)
307       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308          != loop_exit_ctrl_vec_info_type)
309     *relevant = vect_used_in_scope;
310 
311   /* changing memory.  */
312   if (gimple_code (stmt) != GIMPLE_PHI)
313     if (gimple_vdef (stmt)
314 	&& !gimple_clobber_p (stmt))
315       {
316 	if (dump_enabled_p ())
317 	  dump_printf_loc (MSG_NOTE, vect_location,
318                            "vec_stmt_relevant_p: stmt has vdefs.\n");
319 	*relevant = vect_used_in_scope;
320       }
321 
322   /* uses outside the loop.  */
323   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324     {
325       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 	{
327 	  basic_block bb = gimple_bb (USE_STMT (use_p));
328 	  if (!flow_bb_inside_loop_p (loop, bb))
329 	    {
330 	      if (dump_enabled_p ())
331 		dump_printf_loc (MSG_NOTE, vect_location,
332                                  "vec_stmt_relevant_p: used out of loop.\n");
333 
334 	      if (is_gimple_debug (USE_STMT (use_p)))
335 		continue;
336 
337 	      /* We expect all such uses to be in the loop exit phis
338 		 (because of loop closed form)   */
339 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 	      gcc_assert (bb == single_exit (loop)->dest);
341 
342               *live_p = true;
343 	    }
344 	}
345     }
346 
347   if (*live_p && *relevant == vect_unused_in_scope
348       && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
349     {
350       if (dump_enabled_p ())
351 	dump_printf_loc (MSG_NOTE, vect_location,
352 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353       *relevant = vect_used_only_live;
354     }
355 
356   return (*live_p || *relevant);
357 }
358 
359 
360 /* Function exist_non_indexing_operands_for_use_p
361 
362    USE is one of the uses attached to STMT.  Check if USE is
363    used in STMT for anything other than indexing an array.  */
364 
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 {
368   tree operand;
369   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
370 
371   /* USE corresponds to some operand in STMT.  If there is no data
372      reference in STMT, then any operand that corresponds to USE
373      is not indexing an array.  */
374   if (!STMT_VINFO_DATA_REF (stmt_info))
375     return true;
376 
377   /* STMT has a data_ref. FORNOW this means that its of one of
378      the following forms:
379      -1- ARRAY_REF = var
380      -2- var = ARRAY_REF
381      (This should have been verified in analyze_data_refs).
382 
383      'var' in the second case corresponds to a def, not a use,
384      so USE cannot correspond to any operands that are not used
385      for array indexing.
386 
387      Therefore, all we need to check is if STMT falls into the
388      first case, and whether var corresponds to USE.  */
389 
390   if (!gimple_assign_copy_p (stmt))
391     {
392       if (is_gimple_call (stmt)
393 	  && gimple_call_internal_p (stmt))
394 	{
395 	  internal_fn ifn = gimple_call_internal_fn (stmt);
396 	  int mask_index = internal_fn_mask_index (ifn);
397 	  if (mask_index >= 0
398 	      && use == gimple_call_arg (stmt, mask_index))
399 	    return true;
400 	  int stored_value_index = internal_fn_stored_value_index (ifn);
401 	  if (stored_value_index >= 0
402 	      && use == gimple_call_arg (stmt, stored_value_index))
403 	    return true;
404 	  if (internal_gather_scatter_fn_p (ifn)
405 	      && use == gimple_call_arg (stmt, 1))
406 	    return true;
407 	}
408       return false;
409     }
410 
411   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412     return false;
413   operand = gimple_assign_rhs1 (stmt);
414   if (TREE_CODE (operand) != SSA_NAME)
415     return false;
416 
417   if (operand == use)
418     return true;
419 
420   return false;
421 }
422 
423 
424 /*
425    Function process_use.
426 
427    Inputs:
428    - a USE in STMT in a loop represented by LOOP_VINFO
429    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430      that defined USE.  This is done by calling mark_relevant and passing it
431      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433      be performed.
434 
435    Outputs:
436    Generally, LIVE_P and RELEVANT are used to define the liveness and
437    relevance info of the DEF_STMT of this USE:
438        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440    Exceptions:
441    - case 1: If USE is used only for address computations (e.g. array indexing),
442    which does not need to be directly vectorized, then the liveness/relevance
443    of the respective DEF_STMT is left unchanged.
444    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445    skip DEF_STMT cause it had already been processed.
446    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
447    be modified accordingly.
448 
449    Return true if everything is as expected. Return false otherwise.  */
450 
451 static bool
452 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
453 	     enum vect_relevant relevant, vec<gimple *> *worklist,
454 	     bool force)
455 {
456   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458   stmt_vec_info dstmt_vinfo;
459   basic_block bb, def_bb;
460   gimple *def_stmt;
461   enum vect_def_type dt;
462 
463   /* case 1: we are only interested in uses that need to be vectorized.  Uses
464      that are used for address computation are not considered relevant.  */
465   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
466      return true;
467 
468   if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469     {
470       if (dump_enabled_p ())
471         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472                          "not vectorized: unsupported use in stmt.\n");
473       return false;
474     }
475 
476   if (!def_stmt || gimple_nop_p (def_stmt))
477     return true;
478 
479   def_bb = gimple_bb (def_stmt);
480   if (!flow_bb_inside_loop_p (loop, def_bb))
481     {
482       if (dump_enabled_p ())
483 	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
484       return true;
485     }
486 
487   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488      DEF_STMT must have already been processed, because this should be the
489      only way that STMT, which is a reduction-phi, was put in the worklist,
490      as there should be no other uses for DEF_STMT in the loop.  So we just
491      check that everything is as expected, and we are done.  */
492   dstmt_vinfo = vinfo_for_stmt (def_stmt);
493   bb = gimple_bb (stmt);
494   if (gimple_code (stmt) == GIMPLE_PHI
495       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
496       && gimple_code (def_stmt) != GIMPLE_PHI
497       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
498       && bb->loop_father == def_bb->loop_father)
499     {
500       if (dump_enabled_p ())
501 	dump_printf_loc (MSG_NOTE, vect_location,
502                          "reduc-stmt defining reduc-phi in the same nest.\n");
503       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
504 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
505       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
506       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
507 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
508       return true;
509     }
510 
511   /* case 3a: outer-loop stmt defining an inner-loop stmt:
512 	outer-loop-header-bb:
513 		d = def_stmt
514 	inner-loop:
515 		stmt # use (d)
516 	outer-loop-tail-bb:
517 		...		  */
518   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519     {
520       if (dump_enabled_p ())
521 	dump_printf_loc (MSG_NOTE, vect_location,
522                          "outer-loop def-stmt defining inner-loop stmt.\n");
523 
524       switch (relevant)
525 	{
526 	case vect_unused_in_scope:
527 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
528 		      vect_used_in_scope : vect_unused_in_scope;
529 	  break;
530 
531 	case vect_used_in_outer_by_reduction:
532           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
533 	  relevant = vect_used_by_reduction;
534 	  break;
535 
536 	case vect_used_in_outer:
537           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 	  relevant = vect_used_in_scope;
539 	  break;
540 
541 	case vect_used_in_scope:
542 	  break;
543 
544 	default:
545 	  gcc_unreachable ();
546 	}
547     }
548 
549   /* case 3b: inner-loop stmt defining an outer-loop stmt:
550 	outer-loop-header-bb:
551 		...
552 	inner-loop:
553 		d = def_stmt
554 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 		stmt # use (d)		*/
556   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557     {
558       if (dump_enabled_p ())
559 	dump_printf_loc (MSG_NOTE, vect_location,
560                          "inner-loop def-stmt defining outer-loop stmt.\n");
561 
562       switch (relevant)
563         {
564         case vect_unused_in_scope:
565           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
566             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
567                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
568           break;
569 
570         case vect_used_by_reduction:
571 	case vect_used_only_live:
572           relevant = vect_used_in_outer_by_reduction;
573           break;
574 
575         case vect_used_in_scope:
576           relevant = vect_used_in_outer;
577           break;
578 
579         default:
580           gcc_unreachable ();
581         }
582     }
583   /* We are also not interested in uses on loop PHI backedges that are
584      inductions.  Otherwise we'll needlessly vectorize the IV increment
585      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
586      of course.  */
587   else if (gimple_code (stmt) == GIMPLE_PHI
588 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
589 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
590 	   && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
591 	       == use))
592     {
593       if (dump_enabled_p ())
594 	dump_printf_loc (MSG_NOTE, vect_location,
595                          "induction value on backedge.\n");
596       return true;
597     }
598 
599 
600   vect_mark_relevant (worklist, def_stmt, relevant, false);
601   return true;
602 }
603 
604 
605 /* Function vect_mark_stmts_to_be_vectorized.
606 
607    Not all stmts in the loop need to be vectorized. For example:
608 
609      for i...
610        for j...
611    1.    T0 = i + j
612    2.	 T1 = a[T0]
613 
614    3.    j = j + 1
615 
616    Stmt 1 and 3 do not need to be vectorized, because loop control and
617    addressing of vectorized data-refs are handled differently.
618 
619    This pass detects such stmts.  */
620 
621 bool
622 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 {
624   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
625   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
626   unsigned int nbbs = loop->num_nodes;
627   gimple_stmt_iterator si;
628   gimple *stmt;
629   unsigned int i;
630   stmt_vec_info stmt_vinfo;
631   basic_block bb;
632   gimple *phi;
633   bool live_p;
634   enum vect_relevant relevant;
635 
636   if (dump_enabled_p ())
637     dump_printf_loc (MSG_NOTE, vect_location,
638                      "=== vect_mark_stmts_to_be_vectorized ===\n");
639 
640   auto_vec<gimple *, 64> worklist;
641 
642   /* 1. Init worklist.  */
643   for (i = 0; i < nbbs; i++)
644     {
645       bb = bbs[i];
646       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 	{
648 	  phi = gsi_stmt (si);
649 	  if (dump_enabled_p ())
650 	    {
651 	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
652 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
653 	    }
654 
655 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
656 	    vect_mark_relevant (&worklist, phi, relevant, live_p);
657 	}
658       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 	{
660 	  stmt = gsi_stmt (si);
661 	  if (dump_enabled_p ())
662 	    {
663 	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
664 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
665 	    }
666 
667 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
668 	    vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 	}
670     }
671 
672   /* 2. Process_worklist */
673   while (worklist.length () > 0)
674     {
675       use_operand_p use_p;
676       ssa_op_iter iter;
677 
678       stmt = worklist.pop ();
679       if (dump_enabled_p ())
680 	{
681           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
682           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
683 	}
684 
685       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
686 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
687 	 of STMT.  */
688       stmt_vinfo = vinfo_for_stmt (stmt);
689       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 
691       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
692 	 propagated as is to the DEF_STMTs of its USEs.
693 
694 	 One exception is when STMT has been identified as defining a reduction
695 	 variable; in this case we set the relevance to vect_used_by_reduction.
696 	 This is because we distinguish between two kinds of relevant stmts -
697 	 those that are used by a reduction computation, and those that are
698 	 (also) used by a regular computation.  This allows us later on to
699 	 identify stmts that are used solely by a reduction, and therefore the
700 	 order of the results that they produce does not have to be kept.  */
701 
702       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703         {
704           case vect_reduction_def:
705 	    gcc_assert (relevant != vect_unused_in_scope);
706 	    if (relevant != vect_unused_in_scope
707 		&& relevant != vect_used_in_scope
708 		&& relevant != vect_used_by_reduction
709 		&& relevant != vect_used_only_live)
710 	      {
711 		if (dump_enabled_p ())
712 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 				   "unsupported use of reduction.\n");
714 		return false;
715 	      }
716 	    break;
717 
718           case vect_nested_cycle:
719 	    if (relevant != vect_unused_in_scope
720 		&& relevant != vect_used_in_outer_by_reduction
721 		&& relevant != vect_used_in_outer)
722               {
723                 if (dump_enabled_p ())
724                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725                                    "unsupported use of nested cycle.\n");
726 
727                 return false;
728               }
729             break;
730 
731           case vect_double_reduction_def:
732 	    if (relevant != vect_unused_in_scope
733 		&& relevant != vect_used_by_reduction
734 		&& relevant != vect_used_only_live)
735               {
736                 if (dump_enabled_p ())
737                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738                                    "unsupported use of double reduction.\n");
739 
740                 return false;
741               }
742             break;
743 
744           default:
745             break;
746         }
747 
748       if (is_pattern_stmt_p (stmt_vinfo))
749         {
750           /* Pattern statements are not inserted into the code, so
751              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
752              have to scan the RHS or function arguments instead.  */
753           if (is_gimple_assign (stmt))
754             {
755 	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
756 	      tree op = gimple_assign_rhs1 (stmt);
757 
758 	      i = 1;
759 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 		{
761 		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
762 				    relevant, &worklist, false)
763 		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
764 				       relevant, &worklist, false))
765 		    return false;
766 		  i = 2;
767 		}
768 	      for (; i < gimple_num_ops (stmt); i++)
769                 {
770 		  op = gimple_op (stmt, i);
771                   if (TREE_CODE (op) == SSA_NAME
772 		      && !process_use (stmt, op, loop_vinfo, relevant,
773 				       &worklist, false))
774                     return false;
775                  }
776             }
777           else if (is_gimple_call (stmt))
778             {
779               for (i = 0; i < gimple_call_num_args (stmt); i++)
780                 {
781                   tree arg = gimple_call_arg (stmt, i);
782 		  if (!process_use (stmt, arg, loop_vinfo, relevant,
783 				    &worklist, false))
784                     return false;
785                 }
786             }
787         }
788       else
789         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790           {
791             tree op = USE_FROM_PTR (use_p);
792 	    if (!process_use (stmt, op, loop_vinfo, relevant,
793 			      &worklist, false))
794               return false;
795           }
796 
797       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 	{
799 	  gather_scatter_info gs_info;
800 	  if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
801 	    gcc_unreachable ();
802 	  if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
803 			    &worklist, true))
804 	    return false;
805 	}
806     } /* while worklist */
807 
808   return true;
809 }
810 
811 
812 /* Function vect_model_simple_cost.
813 
814    Models cost for simple operations, i.e. those that only emit ncopies of a
815    single op.  Right now, this does not account for multiple insns that could
816    be generated for the single vector op.  We will handle that shortly.  */
817 
818 void
819 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
820 			enum vect_def_type *dt,
821 			int ndts,
822 			stmt_vector_for_cost *prologue_cost_vec,
823 			stmt_vector_for_cost *body_cost_vec)
824 {
825   int i;
826   int inside_cost = 0, prologue_cost = 0;
827 
828   /* The SLP costs were already calculated during SLP tree build.  */
829   gcc_assert (!PURE_SLP_STMT (stmt_info));
830 
831   /* Cost the "broadcast" of a scalar operand in to a vector operand.
832      Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833      cost model.  */
834   for (i = 0; i < ndts; i++)
835     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 					 stmt_info, 0, vect_prologue);
838 
839   /* Pass the inside-of-loop statements to the target-specific cost model.  */
840   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 				  stmt_info, 0, vect_body);
842 
843   if (dump_enabled_p ())
844     dump_printf_loc (MSG_NOTE, vect_location,
845                      "vect_model_simple_cost: inside_cost = %d, "
846                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
847 }
848 
849 
850 /* Model cost for type demotion and promotion operations.  PWR is normally
851    zero for single-step promotions and demotions.  It will be one if
852    two-step promotion/demotion is required, and so on.  Each additional
853    step doubles the number of instructions required.  */
854 
855 static void
856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 				    enum vect_def_type *dt, int pwr)
858 {
859   int i, tmp;
860   int inside_cost = 0, prologue_cost = 0;
861   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863   void *target_cost_data;
864 
865   /* The SLP costs were already calculated during SLP tree build.  */
866   gcc_assert (!PURE_SLP_STMT (stmt_info));
867 
868   if (loop_vinfo)
869     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
870   else
871     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 
873   for (i = 0; i < pwr + 1; i++)
874     {
875       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
876 	(i + 1) : i;
877       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
878 				    vec_promote_demote, stmt_info, 0,
879 				    vect_body);
880     }
881 
882   /* FORNOW: Assuming maximum 2 args per stmts.  */
883   for (i = 0; i < 2; i++)
884     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
885       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
886 				      stmt_info, 0, vect_prologue);
887 
888   if (dump_enabled_p ())
889     dump_printf_loc (MSG_NOTE, vect_location,
890                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
891                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 }
893 
894 /* Function vect_model_store_cost
895 
896    Models cost for stores.  In the case of grouped accesses, one access
897    has the overhead of the grouped access attributed to it.  */
898 
899 void
900 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
901 		       vect_memory_access_type memory_access_type,
902 		       vec_load_store_type vls_type, slp_tree slp_node,
903 		       stmt_vector_for_cost *prologue_cost_vec,
904 		       stmt_vector_for_cost *body_cost_vec)
905 {
906   unsigned int inside_cost = 0, prologue_cost = 0;
907   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
908   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
909   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 
911   if (vls_type == VLS_STORE_INVARIANT)
912     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
913 				       stmt_info, 0, vect_prologue);
914 
915   /* Grouped stores update all elements in the group at once,
916      so we want the DR for the first statement.  */
917   if (!slp_node && grouped_access_p)
918     {
919       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
920       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
921     }
922 
923   /* True if we should include any once-per-group costs as well as
924      the cost of the statement itself.  For SLP we only get called
925      once per group anyhow.  */
926   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 
928   /* We assume that the cost of a single store-lanes instruction is
929      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
930      access is instead being provided by a permute-and-store operation,
931      include the cost of the permutes.  */
932   if (first_stmt_p
933       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934     {
935       /* Uses a high and low interleave or shuffle operations for each
936 	 needed permute.  */
937       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
938       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
939       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
940 				      stmt_info, 0, vect_body);
941 
942       if (dump_enabled_p ())
943         dump_printf_loc (MSG_NOTE, vect_location,
944                          "vect_model_store_cost: strided group_size = %d .\n",
945                          group_size);
946     }
947 
948   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
949   /* Costs of the stores.  */
950   if (memory_access_type == VMAT_ELEMENTWISE
951       || memory_access_type == VMAT_GATHER_SCATTER)
952     {
953       /* N scalar stores plus extracting the elements.  */
954       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
955       inside_cost += record_stmt_cost (body_cost_vec,
956 				       ncopies * assumed_nunits,
957 				       scalar_store, stmt_info, 0, vect_body);
958     }
959   else
960     vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
961 
962   if (memory_access_type == VMAT_ELEMENTWISE
963       || memory_access_type == VMAT_STRIDED_SLP)
964     {
965       /* N scalar stores plus extracting the elements.  */
966       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967       inside_cost += record_stmt_cost (body_cost_vec,
968 				       ncopies * assumed_nunits,
969 				       vec_to_scalar, stmt_info, 0, vect_body);
970     }
971 
972   if (dump_enabled_p ())
973     dump_printf_loc (MSG_NOTE, vect_location,
974                      "vect_model_store_cost: inside_cost = %d, "
975                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
976 }
977 
978 
979 /* Calculate cost of DR's memory access.  */
980 void
981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 		     unsigned int *inside_cost,
983 		     stmt_vector_for_cost *body_cost_vec)
984 {
985   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986   gimple *stmt = DR_STMT (dr);
987   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
988 
989   switch (alignment_support_scheme)
990     {
991     case dr_aligned:
992       {
993 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 					  vector_store, stmt_info, 0,
995 					  vect_body);
996 
997         if (dump_enabled_p ())
998           dump_printf_loc (MSG_NOTE, vect_location,
999                            "vect_model_store_cost: aligned.\n");
1000         break;
1001       }
1002 
1003     case dr_unaligned_supported:
1004       {
1005         /* Here, we assign an additional cost for the unaligned store.  */
1006 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 					  unaligned_store, stmt_info,
1008 					  DR_MISALIGNMENT (dr), vect_body);
1009         if (dump_enabled_p ())
1010           dump_printf_loc (MSG_NOTE, vect_location,
1011                            "vect_model_store_cost: unaligned supported by "
1012                            "hardware.\n");
1013         break;
1014       }
1015 
1016     case dr_unaligned_unsupported:
1017       {
1018         *inside_cost = VECT_MAX_COST;
1019 
1020         if (dump_enabled_p ())
1021           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022                            "vect_model_store_cost: unsupported access.\n");
1023         break;
1024       }
1025 
1026     default:
1027       gcc_unreachable ();
1028     }
1029 }
1030 
1031 
1032 /* Function vect_model_load_cost
1033 
1034    Models cost for loads.  In the case of grouped accesses, one access has
1035    the overhead of the grouped access attributed to it.  Since unaligned
1036    accesses are supported for loads, we also account for the costs of the
1037    access scheme chosen.  */
1038 
1039 void
1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 		      vect_memory_access_type memory_access_type,
1042 		      slp_tree slp_node,
1043 		      stmt_vector_for_cost *prologue_cost_vec,
1044 		      stmt_vector_for_cost *body_cost_vec)
1045 {
1046   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1048   unsigned int inside_cost = 0, prologue_cost = 0;
1049   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1050 
1051   /* Grouped loads read all elements in the group at once,
1052      so we want the DR for the first statement.  */
1053   if (!slp_node && grouped_access_p)
1054     {
1055       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1057     }
1058 
1059   /* True if we should include any once-per-group costs as well as
1060      the cost of the statement itself.  For SLP we only get called
1061      once per group anyhow.  */
1062   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1063 
1064   /* We assume that the cost of a single load-lanes instruction is
1065      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1066      access is instead being provided by a load-and-permute operation,
1067      include the cost of the permutes.  */
1068   if (first_stmt_p
1069       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1070     {
1071       /* Uses an even and odd extract operations or shuffle operations
1072 	 for each needed permute.  */
1073       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1074       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 				      stmt_info, 0, vect_body);
1077 
1078       if (dump_enabled_p ())
1079         dump_printf_loc (MSG_NOTE, vect_location,
1080                          "vect_model_load_cost: strided group_size = %d .\n",
1081                          group_size);
1082     }
1083 
1084   /* The loads themselves.  */
1085   if (memory_access_type == VMAT_ELEMENTWISE
1086       || memory_access_type == VMAT_GATHER_SCATTER)
1087     {
1088       /* N scalar loads plus gathering them into a vector.  */
1089       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1091       inside_cost += record_stmt_cost (body_cost_vec,
1092 				       ncopies * assumed_nunits,
1093 				       scalar_load, stmt_info, 0, vect_body);
1094     }
1095   else
1096     vect_get_load_cost (dr, ncopies, first_stmt_p,
1097 			&inside_cost, &prologue_cost,
1098 			prologue_cost_vec, body_cost_vec, true);
1099   if (memory_access_type == VMAT_ELEMENTWISE
1100       || memory_access_type == VMAT_STRIDED_SLP)
1101     inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 				     stmt_info, 0, vect_body);
1103 
1104   if (dump_enabled_p ())
1105     dump_printf_loc (MSG_NOTE, vect_location,
1106                      "vect_model_load_cost: inside_cost = %d, "
1107                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1108 }
1109 
1110 
1111 /* Calculate cost of DR's memory access.  */
1112 void
1113 vect_get_load_cost (struct data_reference *dr, int ncopies,
1114 		    bool add_realign_cost, unsigned int *inside_cost,
1115 		    unsigned int *prologue_cost,
1116 		    stmt_vector_for_cost *prologue_cost_vec,
1117 		    stmt_vector_for_cost *body_cost_vec,
1118 		    bool record_prologue_costs)
1119 {
1120   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1121   gimple *stmt = DR_STMT (dr);
1122   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 
1124   switch (alignment_support_scheme)
1125     {
1126     case dr_aligned:
1127       {
1128 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129 					  stmt_info, 0, vect_body);
1130 
1131         if (dump_enabled_p ())
1132           dump_printf_loc (MSG_NOTE, vect_location,
1133                            "vect_model_load_cost: aligned.\n");
1134 
1135         break;
1136       }
1137     case dr_unaligned_supported:
1138       {
1139         /* Here, we assign an additional cost for the unaligned load.  */
1140 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1141 					  unaligned_load, stmt_info,
1142 					  DR_MISALIGNMENT (dr), vect_body);
1143 
1144         if (dump_enabled_p ())
1145           dump_printf_loc (MSG_NOTE, vect_location,
1146                            "vect_model_load_cost: unaligned supported by "
1147                            "hardware.\n");
1148 
1149         break;
1150       }
1151     case dr_explicit_realign:
1152       {
1153 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154 					  vector_load, stmt_info, 0, vect_body);
1155 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156 					  vec_perm, stmt_info, 0, vect_body);
1157 
1158         /* FIXME: If the misalignment remains fixed across the iterations of
1159            the containing loop, the following cost should be added to the
1160            prologue costs.  */
1161         if (targetm.vectorize.builtin_mask_for_load)
1162 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163 					    stmt_info, 0, vect_body);
1164 
1165         if (dump_enabled_p ())
1166           dump_printf_loc (MSG_NOTE, vect_location,
1167                            "vect_model_load_cost: explicit realign\n");
1168 
1169         break;
1170       }
1171     case dr_explicit_realign_optimized:
1172       {
1173         if (dump_enabled_p ())
1174           dump_printf_loc (MSG_NOTE, vect_location,
1175                            "vect_model_load_cost: unaligned software "
1176                            "pipelined.\n");
1177 
1178         /* Unaligned software pipeline has a load of an address, an initial
1179            load, and possibly a mask operation to "prime" the loop.  However,
1180            if this is an access in a group of loads, which provide grouped
1181            access, then the above cost should only be considered for one
1182            access in the group.  Inside the loop, there is a load op
1183            and a realignment op.  */
1184 
1185         if (add_realign_cost && record_prologue_costs)
1186           {
1187 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188 						vector_stmt, stmt_info,
1189 						0, vect_prologue);
1190             if (targetm.vectorize.builtin_mask_for_load)
1191 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192 						  vector_stmt, stmt_info,
1193 						  0, vect_prologue);
1194           }
1195 
1196 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197 					  stmt_info, 0, vect_body);
1198 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199 					  stmt_info, 0, vect_body);
1200 
1201         if (dump_enabled_p ())
1202           dump_printf_loc (MSG_NOTE, vect_location,
1203                            "vect_model_load_cost: explicit realign optimized"
1204                            "\n");
1205 
1206         break;
1207       }
1208 
1209     case dr_unaligned_unsupported:
1210       {
1211         *inside_cost = VECT_MAX_COST;
1212 
1213         if (dump_enabled_p ())
1214           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1215                            "vect_model_load_cost: unsupported access.\n");
1216         break;
1217       }
1218 
1219     default:
1220       gcc_unreachable ();
1221     }
1222 }
1223 
1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225    the loop preheader for the vectorized stmt STMT.  */
1226 
1227 static void
1228 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229 {
1230   if (gsi)
1231     vect_finish_stmt_generation (stmt, new_stmt, gsi);
1232   else
1233     {
1234       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1235       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236 
1237       if (loop_vinfo)
1238         {
1239           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1240 	  basic_block new_bb;
1241 	  edge pe;
1242 
1243           if (nested_in_vect_loop_p (loop, stmt))
1244             loop = loop->inner;
1245 
1246 	  pe = loop_preheader_edge (loop);
1247           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1248           gcc_assert (!new_bb);
1249 	}
1250       else
1251        {
1252           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253           basic_block bb;
1254           gimple_stmt_iterator gsi_bb_start;
1255 
1256           gcc_assert (bb_vinfo);
1257           bb = BB_VINFO_BB (bb_vinfo);
1258           gsi_bb_start = gsi_after_labels (bb);
1259           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1260        }
1261     }
1262 
1263   if (dump_enabled_p ())
1264     {
1265       dump_printf_loc (MSG_NOTE, vect_location,
1266                        "created new init_stmt: ");
1267       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1268     }
1269 }
1270 
1271 /* Function vect_init_vector.
1272 
1273    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1275    vector type a vector with all elements equal to VAL is created first.
1276    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1277    initialization at the loop preheader.
1278    Return the DEF of INIT_STMT.
1279    It will be used in the vectorization of STMT.  */
1280 
1281 tree
1282 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283 {
1284   gimple *init_stmt;
1285   tree new_temp;
1286 
1287   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1288   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289     {
1290       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 	{
1293 	  /* Scalar boolean value should be transformed into
1294 	     all zeros or all ones value before building a vector.  */
1295 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1296 	    {
1297 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1299 
1300 	      if (CONSTANT_CLASS_P (val))
1301 		val = integer_zerop (val) ? false_val : true_val;
1302 	      else
1303 		{
1304 		  new_temp = make_ssa_name (TREE_TYPE (type));
1305 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306 						   val, true_val, false_val);
1307 		  vect_init_vector_1 (stmt, init_stmt, gsi);
1308 		  val = new_temp;
1309 		}
1310 	    }
1311 	  else if (CONSTANT_CLASS_P (val))
1312 	    val = fold_convert (TREE_TYPE (type), val);
1313 	  else
1314 	    {
1315 	      new_temp = make_ssa_name (TREE_TYPE (type));
1316 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317 		init_stmt = gimple_build_assign (new_temp,
1318 						 fold_build1 (VIEW_CONVERT_EXPR,
1319 							      TREE_TYPE (type),
1320 							      val));
1321 	      else
1322 		init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1323 	      vect_init_vector_1 (stmt, init_stmt, gsi);
1324 	      val = new_temp;
1325 	    }
1326 	}
1327       val = build_vector_from_val (type, val);
1328     }
1329 
1330   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331   init_stmt = gimple_build_assign  (new_temp, val);
1332   vect_init_vector_1 (stmt, init_stmt, gsi);
1333   return new_temp;
1334 }
1335 
1336 /* Function vect_get_vec_def_for_operand_1.
1337 
1338    For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339    DT that will be used in the vectorized stmt.  */
1340 
1341 tree
1342 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343 {
1344   tree vec_oprnd;
1345   gimple *vec_stmt;
1346   stmt_vec_info def_stmt_info = NULL;
1347 
1348   switch (dt)
1349     {
1350     /* operand is a constant or a loop invariant.  */
1351     case vect_constant_def:
1352     case vect_external_def:
1353       /* Code should use vect_get_vec_def_for_operand.  */
1354       gcc_unreachable ();
1355 
1356     /* operand is defined inside the loop.  */
1357     case vect_internal_def:
1358       {
1359         /* Get the def from the vectorized stmt.  */
1360         def_stmt_info = vinfo_for_stmt (def_stmt);
1361 
1362         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1363         /* Get vectorized pattern statement.  */
1364         if (!vec_stmt
1365             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366             && !STMT_VINFO_RELEVANT (def_stmt_info))
1367           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1369         gcc_assert (vec_stmt);
1370 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371 	  vec_oprnd = PHI_RESULT (vec_stmt);
1372 	else if (is_gimple_call (vec_stmt))
1373 	  vec_oprnd = gimple_call_lhs (vec_stmt);
1374 	else
1375 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1376         return vec_oprnd;
1377       }
1378 
1379     /* operand is defined by a loop header phi.  */
1380     case vect_reduction_def:
1381     case vect_double_reduction_def:
1382     case vect_nested_cycle:
1383     case vect_induction_def:
1384       {
1385 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386 
1387         /* Get the def from the vectorized stmt.  */
1388         def_stmt_info = vinfo_for_stmt (def_stmt);
1389         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 	  vec_oprnd = PHI_RESULT (vec_stmt);
1392 	else
1393 	  vec_oprnd = gimple_get_lhs (vec_stmt);
1394         return vec_oprnd;
1395       }
1396 
1397     default:
1398       gcc_unreachable ();
1399     }
1400 }
1401 
1402 
1403 /* Function vect_get_vec_def_for_operand.
1404 
1405    OP is an operand in STMT.  This function returns a (vector) def that will be
1406    used in the vectorized stmt for STMT.
1407 
1408    In the case that OP is an SSA_NAME which is defined in the loop, then
1409    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 
1411    In case OP is an invariant or constant, a new stmt that creates a vector def
1412    needs to be introduced.  VECTYPE may be used to specify a required type for
1413    vector invariant.  */
1414 
1415 tree
1416 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 {
1418   gimple *def_stmt;
1419   enum vect_def_type dt;
1420   bool is_simple_use;
1421   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423 
1424   if (dump_enabled_p ())
1425     {
1426       dump_printf_loc (MSG_NOTE, vect_location,
1427                        "vect_get_vec_def_for_operand: ");
1428       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429       dump_printf (MSG_NOTE, "\n");
1430     }
1431 
1432   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433   gcc_assert (is_simple_use);
1434   if (def_stmt && dump_enabled_p ())
1435     {
1436       dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1437       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438     }
1439 
1440   if (dt == vect_constant_def || dt == vect_external_def)
1441     {
1442       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443       tree vector_type;
1444 
1445       if (vectype)
1446 	vector_type = vectype;
1447       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1448 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449 	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450       else
1451 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452 
1453       gcc_assert (vector_type);
1454       return vect_init_vector (stmt, op, vector_type, NULL);
1455     }
1456   else
1457     return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1458 }
1459 
1460 
1461 /* Function vect_get_vec_def_for_stmt_copy
1462 
1463    Return a vector-def for an operand.  This function is used when the
1464    vectorized stmt to be created (by the caller to this function) is a "copy"
1465    created in case the vectorized result cannot fit in one vector, and several
1466    copies of the vector-stmt are required.  In this case the vector-def is
1467    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468    of the stmt that defines VEC_OPRND.
1469    DT is the type of the vector def VEC_OPRND.
1470 
1471    Context:
1472         In case the vectorization factor (VF) is bigger than the number
1473    of elements that can fit in a vectype (nunits), we have to generate
1474    more than one vector stmt to vectorize the scalar stmt.  This situation
1475    arises when there are multiple data-types operated upon in the loop; the
1476    smallest data-type determines the VF, and as a result, when vectorizing
1477    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478    vector stmt (each computing a vector of 'nunits' results, and together
1479    computing 'VF' results in each iteration).  This function is called when
1480    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481    which VF=16 and nunits=4, so the number of copies required is 4):
1482 
1483    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1484 
1485    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1486                         VS1.1:  vx.1 = memref1      VS1.2
1487                         VS1.2:  vx.2 = memref2      VS1.3
1488                         VS1.3:  vx.3 = memref3
1489 
1490    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1491                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1492                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1493                         VSnew.3:  vz3 = vx.3 + ...
1494 
1495    The vectorization of S1 is explained in vectorizable_load.
1496    The vectorization of S2:
1497         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498    the function 'vect_get_vec_def_for_operand' is called to
1499    get the relevant vector-def for each operand of S2.  For operand x it
1500    returns  the vector-def 'vx.0'.
1501 
1502         To create the remaining copies of the vector-stmt (VSnew.j), this
1503    function is called to get the relevant vector-def for each operand.  It is
1504    obtained from the respective VS1.j stmt, which is recorded in the
1505    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 
1507         For example, to obtain the vector-def 'vx.1' in order to create the
1508    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511    and return its def ('vx.1').
1512    Overall, to create the above sequence this function will be called 3 times:
1513         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1516 
1517 tree
1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519 {
1520   gimple *vec_stmt_for_operand;
1521   stmt_vec_info def_stmt_info;
1522 
1523   /* Do nothing; can reuse same def.  */
1524   if (dt == vect_external_def || dt == vect_constant_def )
1525     return vec_oprnd;
1526 
1527   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529   gcc_assert (def_stmt_info);
1530   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531   gcc_assert (vec_stmt_for_operand);
1532   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534   else
1535     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536   return vec_oprnd;
1537 }
1538 
1539 
1540 /* Get vectorized definitions for the operands to create a copy of an original
1541    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1542 
1543 void
1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1545 				 vec<tree> *vec_oprnds0,
1546 				 vec<tree> *vec_oprnds1)
1547 {
1548   tree vec_oprnd = vec_oprnds0->pop ();
1549 
1550   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1551   vec_oprnds0->quick_push (vec_oprnd);
1552 
1553   if (vec_oprnds1 && vec_oprnds1->length ())
1554     {
1555       vec_oprnd = vec_oprnds1->pop ();
1556       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1557       vec_oprnds1->quick_push (vec_oprnd);
1558     }
1559 }
1560 
1561 
1562 /* Get vectorized definitions for OP0 and OP1.  */
1563 
1564 void
1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 		   vec<tree> *vec_oprnds0,
1567 		   vec<tree> *vec_oprnds1,
1568 		   slp_tree slp_node)
1569 {
1570   if (slp_node)
1571     {
1572       int nops = (op1 == NULL_TREE) ? 1 : 2;
1573       auto_vec<tree> ops (nops);
1574       auto_vec<vec<tree> > vec_defs (nops);
1575 
1576       ops.quick_push (op0);
1577       if (op1)
1578         ops.quick_push (op1);
1579 
1580       vect_get_slp_defs (ops, slp_node, &vec_defs);
1581 
1582       *vec_oprnds0 = vec_defs[0];
1583       if (op1)
1584 	*vec_oprnds1 = vec_defs[1];
1585     }
1586   else
1587     {
1588       tree vec_oprnd;
1589 
1590       vec_oprnds0->create (1);
1591       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592       vec_oprnds0->quick_push (vec_oprnd);
1593 
1594       if (op1)
1595 	{
1596 	  vec_oprnds1->create (1);
1597 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 	  vec_oprnds1->quick_push (vec_oprnd);
1599 	}
1600     }
1601 }
1602 
1603 /* Helper function called by vect_finish_replace_stmt and
1604    vect_finish_stmt_generation.  Set the location of the new
1605    statement and create a stmt_vec_info for it.  */
1606 
1607 static void
1608 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1609 {
1610   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1611   vec_info *vinfo = stmt_info->vinfo;
1612 
1613   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614 
1615   if (dump_enabled_p ())
1616     {
1617       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1619     }
1620 
1621   gimple_set_location (vec_stmt, gimple_location (stmt));
1622 
1623   /* While EH edges will generally prevent vectorization, stmt might
1624      e.g. be in a must-not-throw region.  Ensure newly created stmts
1625      that could throw are part of the same region.  */
1626   int lp_nr = lookup_stmt_eh_lp (stmt);
1627   if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1629 }
1630 
1631 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1632    which sets the same scalar result as STMT did.  */
1633 
1634 void
1635 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1636 {
1637   gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1638 
1639   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640   gsi_replace (&gsi, vec_stmt, false);
1641 
1642   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1643 }
1644 
1645 /* Function vect_finish_stmt_generation.
1646 
1647    Insert a new stmt.  */
1648 
1649 void
1650 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1651 			     gimple_stmt_iterator *gsi)
1652 {
1653   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654 
1655   if (!gsi_end_p (*gsi)
1656       && gimple_has_mem_ops (vec_stmt))
1657     {
1658       gimple *at_stmt = gsi_stmt (*gsi);
1659       tree vuse = gimple_vuse (at_stmt);
1660       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661 	{
1662 	  tree vdef = gimple_vdef (at_stmt);
1663 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 	  /* If we have an SSA vuse and insert a store, update virtual
1665 	     SSA form to avoid triggering the renamer.  Do so only
1666 	     if we can easily see all uses - which is what almost always
1667 	     happens with the way vectorized stmts are inserted.  */
1668 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 	      && ((is_gimple_assign (vec_stmt)
1670 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 		  || (is_gimple_call (vec_stmt)
1672 		      && !(gimple_call_flags (vec_stmt)
1673 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674 	    {
1675 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 	      gimple_set_vdef (vec_stmt, new_vdef);
1677 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678 	    }
1679 	}
1680     }
1681   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1682   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1683 }
1684 
1685 /* We want to vectorize a call to combined function CFN with function
1686    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1687    as the types of all inputs.  Check whether this is possible using
1688    an internal function, returning its code if so or IFN_LAST if not.  */
1689 
1690 static internal_fn
1691 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1692 				tree vectype_out, tree vectype_in)
1693 {
1694   internal_fn ifn;
1695   if (internal_fn_p (cfn))
1696     ifn = as_internal_fn (cfn);
1697   else
1698     ifn = associated_internal_fn (fndecl);
1699   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1700     {
1701       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1702       if (info.vectorizable)
1703 	{
1704 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1705 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1706 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1707 					      OPTIMIZE_FOR_SPEED))
1708 	    return ifn;
1709 	}
1710     }
1711   return IFN_LAST;
1712 }
1713 
1714 
1715 static tree permute_vec_elements (tree, tree, tree, gimple *,
1716 				  gimple_stmt_iterator *);
1717 
1718 /* Check whether a load or store statement in the loop described by
1719    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1720    whether the vectorizer pass has the appropriate support, as well as
1721    whether the target does.
1722 
1723    VLS_TYPE says whether the statement is a load or store and VECTYPE
1724    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1725    says how the load or store is going to be implemented and GROUP_SIZE
1726    is the number of load or store statements in the containing group.
1727    If the access is a gather load or scatter store, GS_INFO describes
1728    its arguments.
1729 
1730    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1731    supported, otherwise record the required mask types.  */
1732 
1733 static void
1734 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1735 			  vec_load_store_type vls_type, int group_size,
1736 			  vect_memory_access_type memory_access_type,
1737 			  gather_scatter_info *gs_info)
1738 {
1739   /* Invariant loads need no special support.  */
1740   if (memory_access_type == VMAT_INVARIANT)
1741     return;
1742 
1743   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744   machine_mode vecmode = TYPE_MODE (vectype);
1745   bool is_load = (vls_type == VLS_LOAD);
1746   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1747     {
1748       if (is_load
1749 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1750 	  : !vect_store_lanes_supported (vectype, group_size, true))
1751 	{
1752 	  if (dump_enabled_p ())
1753 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 			     "can't use a fully-masked loop because the"
1755 			     " target doesn't have an appropriate masked"
1756 			     " load/store-lanes instruction.\n");
1757 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758 	  return;
1759 	}
1760       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762       return;
1763     }
1764 
1765   if (memory_access_type == VMAT_GATHER_SCATTER)
1766     {
1767       internal_fn ifn = (is_load
1768 			 ? IFN_MASK_GATHER_LOAD
1769 			 : IFN_MASK_SCATTER_STORE);
1770       tree offset_type = TREE_TYPE (gs_info->offset);
1771       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1772 						   gs_info->memory_type,
1773 						   TYPE_SIGN (offset_type),
1774 						   gs_info->scale))
1775 	{
1776 	  if (dump_enabled_p ())
1777 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 			     "can't use a fully-masked loop because the"
1779 			     " target doesn't have an appropriate masked"
1780 			     " gather load or scatter store instruction.\n");
1781 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1782 	  return;
1783 	}
1784       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1785       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1786       return;
1787     }
1788 
1789   if (memory_access_type != VMAT_CONTIGUOUS
1790       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1791     {
1792       /* Element X of the data must come from iteration i * VF + X of the
1793 	 scalar loop.  We need more work to support other mappings.  */
1794       if (dump_enabled_p ())
1795 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796 			 "can't use a fully-masked loop because an access"
1797 			 " isn't contiguous.\n");
1798       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1799       return;
1800     }
1801 
1802   machine_mode mask_mode;
1803   if (!(targetm.vectorize.get_mask_mode
1804 	(GET_MODE_NUNITS (vecmode),
1805 	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1806       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1807     {
1808       if (dump_enabled_p ())
1809 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 			 "can't use a fully-masked loop because the target"
1811 			 " doesn't have the appropriate masked load or"
1812 			 " store.\n");
1813       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814       return;
1815     }
1816   /* We might load more scalars than we need for permuting SLP loads.
1817      We checked in get_group_load_store_type that the extra elements
1818      don't leak into a new vector.  */
1819   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1820   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1821   unsigned int nvectors;
1822   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1823     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1824   else
1825     gcc_unreachable ();
1826 }
1827 
1828 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1829    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1830    that needs to be applied to all loads and stores in a vectorized loop.
1831    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1832 
1833    MASK_TYPE is the type of both masks.  If new statements are needed,
1834    insert them before GSI.  */
1835 
1836 static tree
1837 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1838 			 gimple_stmt_iterator *gsi)
1839 {
1840   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1841   if (!loop_mask)
1842     return vec_mask;
1843 
1844   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1845   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1846   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1847 					  vec_mask, loop_mask);
1848   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1849   return and_res;
1850 }
1851 
1852 /* Determine whether we can use a gather load or scatter store to vectorize
1853    strided load or store STMT by truncating the current offset to a smaller
1854    width.  We need to be able to construct an offset vector:
1855 
1856      { 0, X, X*2, X*3, ... }
1857 
1858    without loss of precision, where X is STMT's DR_STEP.
1859 
1860    Return true if this is possible, describing the gather load or scatter
1861    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1862 
1863 static bool
1864 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1865 				     bool masked_p,
1866 				     gather_scatter_info *gs_info)
1867 {
1868   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1869   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1870   tree step = DR_STEP (dr);
1871   if (TREE_CODE (step) != INTEGER_CST)
1872     {
1873       /* ??? Perhaps we could use range information here?  */
1874       if (dump_enabled_p ())
1875 	dump_printf_loc (MSG_NOTE, vect_location,
1876 			 "cannot truncate variable step.\n");
1877       return false;
1878     }
1879 
1880   /* Get the number of bits in an element.  */
1881   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1883   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1884 
1885   /* Set COUNT to the upper limit on the number of elements - 1.
1886      Start with the maximum vectorization factor.  */
1887   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1888 
1889   /* Try lowering COUNT to the number of scalar latch iterations.  */
1890   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1891   widest_int max_iters;
1892   if (max_loop_iterations (loop, &max_iters)
1893       && max_iters < count)
1894     count = max_iters.to_shwi ();
1895 
1896   /* Try scales of 1 and the element size.  */
1897   int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1898   bool overflow_p = false;
1899   for (int i = 0; i < 2; ++i)
1900     {
1901       int scale = scales[i];
1902       widest_int factor;
1903       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1904 	continue;
1905 
1906       /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1907 	 in OFFSET_BITS bits.  */
1908       widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1909       if (overflow_p)
1910 	continue;
1911       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1912       if (wi::min_precision (range, sign) > element_bits)
1913 	{
1914 	  overflow_p = true;
1915 	  continue;
1916 	}
1917 
1918       /* See whether the target supports the operation.  */
1919       tree memory_type = TREE_TYPE (DR_REF (dr));
1920       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1921 				     memory_type, element_bits, sign, scale,
1922 				     &gs_info->ifn, &gs_info->element_type))
1923 	continue;
1924 
1925       tree offset_type = build_nonstandard_integer_type (element_bits,
1926 							 sign == UNSIGNED);
1927 
1928       gs_info->decl = NULL_TREE;
1929       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1930 	 but we don't need to store that here.  */
1931       gs_info->base = NULL_TREE;
1932       gs_info->offset = fold_convert (offset_type, step);
1933       gs_info->offset_dt = vect_constant_def;
1934       gs_info->offset_vectype = NULL_TREE;
1935       gs_info->scale = scale;
1936       gs_info->memory_type = memory_type;
1937       return true;
1938     }
1939 
1940   if (overflow_p && dump_enabled_p ())
1941     dump_printf_loc (MSG_NOTE, vect_location,
1942 		     "truncating gather/scatter offset to %d bits"
1943 		     " might change its value.\n", element_bits);
1944 
1945   return false;
1946 }
1947 
1948 /* Return true if we can use gather/scatter internal functions to
1949    vectorize STMT, which is a grouped or strided load or store.
1950    MASKED_P is true if load or store is conditional.  When returning
1951    true, fill in GS_INFO with the information required to perform the
1952    operation.  */
1953 
1954 static bool
1955 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
1956 				    bool masked_p,
1957 				    gather_scatter_info *gs_info)
1958 {
1959   if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1960       || gs_info->decl)
1961     return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1962 						masked_p, gs_info);
1963 
1964   scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1965   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1966   tree offset_type = TREE_TYPE (gs_info->offset);
1967   unsigned int offset_bits = TYPE_PRECISION (offset_type);
1968 
1969   /* Enforced by vect_check_gather_scatter.  */
1970   gcc_assert (element_bits >= offset_bits);
1971 
1972   /* If the elements are wider than the offset, convert the offset to the
1973      same width, without changing its sign.  */
1974   if (element_bits > offset_bits)
1975     {
1976       bool unsigned_p = TYPE_UNSIGNED (offset_type);
1977       offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1978       gs_info->offset = fold_convert (offset_type, gs_info->offset);
1979     }
1980 
1981   if (dump_enabled_p ())
1982     dump_printf_loc (MSG_NOTE, vect_location,
1983 		     "using gather/scatter for strided/grouped access,"
1984 		     " scale = %d\n", gs_info->scale);
1985 
1986   return true;
1987 }
1988 
1989 /* STMT is a non-strided load or store, meaning that it accesses
1990    elements with a known constant step.  Return -1 if that step
1991    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1992 
1993 static int
1994 compare_step_with_zero (gimple *stmt)
1995 {
1996   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1997   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998   return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1999 			       size_zero_node);
2000 }
2001 
2002 /* If the target supports a permute mask that reverses the elements in
2003    a vector of type VECTYPE, return that mask, otherwise return null.  */
2004 
2005 static tree
2006 perm_mask_for_reverse (tree vectype)
2007 {
2008   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2009 
2010   /* The encoding has a single stepped pattern.  */
2011   vec_perm_builder sel (nunits, 1, 3);
2012   for (int i = 0; i < 3; ++i)
2013     sel.quick_push (nunits - 1 - i);
2014 
2015   vec_perm_indices indices (sel, 1, nunits);
2016   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2017     return NULL_TREE;
2018   return vect_gen_perm_mask_checked (vectype, indices);
2019 }
2020 
2021 /* STMT is either a masked or unconditional store.  Return the value
2022    being stored.  */
2023 
2024 tree
2025 vect_get_store_rhs (gimple *stmt)
2026 {
2027   if (gassign *assign = dyn_cast <gassign *> (stmt))
2028     {
2029       gcc_assert (gimple_assign_single_p (assign));
2030       return gimple_assign_rhs1 (assign);
2031     }
2032   if (gcall *call = dyn_cast <gcall *> (stmt))
2033     {
2034       internal_fn ifn = gimple_call_internal_fn (call);
2035       int index = internal_fn_stored_value_index (ifn);
2036       gcc_assert (index >= 0);
2037       return gimple_call_arg (stmt, index);
2038     }
2039   gcc_unreachable ();
2040 }
2041 
2042 /* A subroutine of get_load_store_type, with a subset of the same
2043    arguments.  Handle the case where STMT is part of a grouped load
2044    or store.
2045 
2046    For stores, the statements in the group are all consecutive
2047    and there is no gap at the end.  For loads, the statements in the
2048    group might not be consecutive; there can be gaps between statements
2049    as well as at the end.  */
2050 
2051 static bool
2052 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2053 			   bool masked_p, vec_load_store_type vls_type,
2054 			   vect_memory_access_type *memory_access_type,
2055 			   gather_scatter_info *gs_info)
2056 {
2057   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058   vec_info *vinfo = stmt_info->vinfo;
2059   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2060   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2061   gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
2062   data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2063   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2064   bool single_element_p = (stmt == first_stmt
2065 			   && !GROUP_NEXT_ELEMENT (stmt_info));
2066   unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
2067   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2068 
2069   /* True if the vectorized statements would access beyond the last
2070      statement in the group.  */
2071   bool overrun_p = false;
2072 
2073   /* True if we can cope with such overrun by peeling for gaps, so that
2074      there is at least one final scalar iteration after the vector loop.  */
2075   bool can_overrun_p = (!masked_p
2076 			&& vls_type == VLS_LOAD
2077 			&& loop_vinfo
2078 			&& !loop->inner);
2079 
2080   /* There can only be a gap at the end of the group if the stride is
2081      known at compile time.  */
2082   gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2083 
2084   /* Stores can't yet have gaps.  */
2085   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2086 
2087   if (slp)
2088     {
2089       if (STMT_VINFO_STRIDED_P (stmt_info))
2090 	{
2091 	  /* Try to use consecutive accesses of GROUP_SIZE elements,
2092 	     separated by the stride, until we have a complete vector.
2093 	     Fall back to scalar accesses if that isn't possible.  */
2094 	  if (multiple_p (nunits, group_size))
2095 	    *memory_access_type = VMAT_STRIDED_SLP;
2096 	  else
2097 	    *memory_access_type = VMAT_ELEMENTWISE;
2098 	}
2099       else
2100 	{
2101 	  overrun_p = loop_vinfo && gap != 0;
2102 	  if (overrun_p && vls_type != VLS_LOAD)
2103 	    {
2104 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2105 			       "Grouped store with gaps requires"
2106 			       " non-consecutive accesses\n");
2107 	      return false;
2108 	    }
2109 	  /* An overrun is fine if the trailing elements are smaller
2110 	     than the alignment boundary B.  Every vector access will
2111 	     be a multiple of B and so we are guaranteed to access a
2112 	     non-gap element in the same B-sized block.  */
2113 	  if (overrun_p
2114 	      && gap < (vect_known_alignment_in_bytes (first_dr)
2115 			/ vect_get_scalar_dr_size (first_dr)))
2116 	    overrun_p = false;
2117 	  if (overrun_p && !can_overrun_p)
2118 	    {
2119 	      if (dump_enabled_p ())
2120 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121 				 "Peeling for outer loop is not supported\n");
2122 	      return false;
2123 	    }
2124 	  *memory_access_type = VMAT_CONTIGUOUS;
2125 	}
2126     }
2127   else
2128     {
2129       /* We can always handle this case using elementwise accesses,
2130 	 but see if something more efficient is available.  */
2131       *memory_access_type = VMAT_ELEMENTWISE;
2132 
2133       /* If there is a gap at the end of the group then these optimizations
2134 	 would access excess elements in the last iteration.  */
2135       bool would_overrun_p = (gap != 0);
2136       /* An overrun is fine if the trailing elements are smaller than the
2137 	 alignment boundary B.  Every vector access will be a multiple of B
2138 	 and so we are guaranteed to access a non-gap element in the
2139 	 same B-sized block.  */
2140       if (would_overrun_p
2141 	  && !masked_p
2142 	  && gap < (vect_known_alignment_in_bytes (first_dr)
2143 		    / vect_get_scalar_dr_size (first_dr)))
2144 	would_overrun_p = false;
2145 
2146       if (!STMT_VINFO_STRIDED_P (stmt_info)
2147 	  && (can_overrun_p || !would_overrun_p)
2148 	  && compare_step_with_zero (stmt) > 0)
2149 	{
2150 	  /* First cope with the degenerate case of a single-element
2151 	     vector.  */
2152 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2153 	    *memory_access_type = VMAT_CONTIGUOUS;
2154 
2155 	  /* Otherwise try using LOAD/STORE_LANES.  */
2156 	  if (*memory_access_type == VMAT_ELEMENTWISE
2157 	      && (vls_type == VLS_LOAD
2158 		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
2159 		  : vect_store_lanes_supported (vectype, group_size,
2160 						masked_p)))
2161 	    {
2162 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2163 	      overrun_p = would_overrun_p;
2164 	    }
2165 
2166 	  /* If that fails, try using permuting loads.  */
2167 	  if (*memory_access_type == VMAT_ELEMENTWISE
2168 	      && (vls_type == VLS_LOAD
2169 		  ? vect_grouped_load_supported (vectype, single_element_p,
2170 						 group_size)
2171 		  : vect_grouped_store_supported (vectype, group_size)))
2172 	    {
2173 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2174 	      overrun_p = would_overrun_p;
2175 	    }
2176 	}
2177 
2178       /* As a last resort, trying using a gather load or scatter store.
2179 
2180 	 ??? Although the code can handle all group sizes correctly,
2181 	 it probably isn't a win to use separate strided accesses based
2182 	 on nearby locations.  Or, even if it's a win over scalar code,
2183 	 it might not be a win over vectorizing at a lower VF, if that
2184 	 allows us to use contiguous accesses.  */
2185       if (*memory_access_type == VMAT_ELEMENTWISE
2186 	  && single_element_p
2187 	  && loop_vinfo
2188 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2189 						 masked_p, gs_info))
2190 	*memory_access_type = VMAT_GATHER_SCATTER;
2191     }
2192 
2193   if (vls_type != VLS_LOAD && first_stmt == stmt)
2194     {
2195       /* STMT is the leader of the group. Check the operands of all the
2196 	 stmts of the group.  */
2197       gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2198       while (next_stmt)
2199 	{
2200 	  tree op = vect_get_store_rhs (next_stmt);
2201 	  gimple *def_stmt;
2202 	  enum vect_def_type dt;
2203 	  if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2204 	    {
2205 	      if (dump_enabled_p ())
2206 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2207 				 "use not simple.\n");
2208 	      return false;
2209 	    }
2210 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2211 	}
2212     }
2213 
2214   if (overrun_p)
2215     {
2216       gcc_assert (can_overrun_p);
2217       if (dump_enabled_p ())
2218 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219 			 "Data access with gaps requires scalar "
2220 			 "epilogue loop\n");
2221       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2222     }
2223 
2224   return true;
2225 }
2226 
2227 /* A subroutine of get_load_store_type, with a subset of the same
2228    arguments.  Handle the case where STMT is a load or store that
2229    accesses consecutive elements with a negative step.  */
2230 
2231 static vect_memory_access_type
2232 get_negative_load_store_type (gimple *stmt, tree vectype,
2233 			      vec_load_store_type vls_type,
2234 			      unsigned int ncopies)
2235 {
2236   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2237   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2238   dr_alignment_support alignment_support_scheme;
2239 
2240   if (ncopies > 1)
2241     {
2242       if (dump_enabled_p ())
2243 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244 			 "multiple types with negative step.\n");
2245       return VMAT_ELEMENTWISE;
2246     }
2247 
2248   alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2249   if (alignment_support_scheme != dr_aligned
2250       && alignment_support_scheme != dr_unaligned_supported)
2251     {
2252       if (dump_enabled_p ())
2253 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 			 "negative step but alignment required.\n");
2255       return VMAT_ELEMENTWISE;
2256     }
2257 
2258   if (vls_type == VLS_STORE_INVARIANT)
2259     {
2260       if (dump_enabled_p ())
2261 	dump_printf_loc (MSG_NOTE, vect_location,
2262 			 "negative step with invariant source;"
2263 			 " no permute needed.\n");
2264       return VMAT_CONTIGUOUS_DOWN;
2265     }
2266 
2267   if (!perm_mask_for_reverse (vectype))
2268     {
2269       if (dump_enabled_p ())
2270 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2271 			 "negative step and reversing not supported.\n");
2272       return VMAT_ELEMENTWISE;
2273     }
2274 
2275   return VMAT_CONTIGUOUS_REVERSE;
2276 }
2277 
2278 /* Analyze load or store statement STMT of type VLS_TYPE.  Return true
2279    if there is a memory access type that the vectorized form can use,
2280    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2281    or scatters, fill in GS_INFO accordingly.
2282 
2283    SLP says whether we're performing SLP rather than loop vectorization.
2284    MASKED_P is true if the statement is conditional on a vectorized mask.
2285    VECTYPE is the vector type that the vectorized statements will use.
2286    NCOPIES is the number of vector statements that will be needed.  */
2287 
2288 static bool
2289 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2290 		     vec_load_store_type vls_type, unsigned int ncopies,
2291 		     vect_memory_access_type *memory_access_type,
2292 		     gather_scatter_info *gs_info)
2293 {
2294   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2295   vec_info *vinfo = stmt_info->vinfo;
2296   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2297   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2298   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2299     {
2300       *memory_access_type = VMAT_GATHER_SCATTER;
2301       gimple *def_stmt;
2302       if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2303 	gcc_unreachable ();
2304       else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2305 				    &gs_info->offset_dt,
2306 				    &gs_info->offset_vectype))
2307 	{
2308 	  if (dump_enabled_p ())
2309 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2310 			     "%s index use not simple.\n",
2311 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2312 	  return false;
2313 	}
2314     }
2315   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2316     {
2317       if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2318 				      memory_access_type, gs_info))
2319 	return false;
2320     }
2321   else if (STMT_VINFO_STRIDED_P (stmt_info))
2322     {
2323       gcc_assert (!slp);
2324       if (loop_vinfo
2325 	  && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2326 						 masked_p, gs_info))
2327 	*memory_access_type = VMAT_GATHER_SCATTER;
2328       else
2329 	*memory_access_type = VMAT_ELEMENTWISE;
2330     }
2331   else
2332     {
2333       int cmp = compare_step_with_zero (stmt);
2334       if (cmp < 0)
2335 	*memory_access_type = get_negative_load_store_type
2336 	  (stmt, vectype, vls_type, ncopies);
2337       else if (cmp == 0)
2338 	{
2339 	  gcc_assert (vls_type == VLS_LOAD);
2340 	  *memory_access_type = VMAT_INVARIANT;
2341 	}
2342       else
2343 	*memory_access_type = VMAT_CONTIGUOUS;
2344     }
2345 
2346   if ((*memory_access_type == VMAT_ELEMENTWISE
2347        || *memory_access_type == VMAT_STRIDED_SLP)
2348       && !nunits.is_constant ())
2349     {
2350       if (dump_enabled_p ())
2351 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 			 "Not using elementwise accesses due to variable "
2353 			 "vectorization factor.\n");
2354       return false;
2355     }
2356 
2357   /* FIXME: At the moment the cost model seems to underestimate the
2358      cost of using elementwise accesses.  This check preserves the
2359      traditional behavior until that can be fixed.  */
2360   if (*memory_access_type == VMAT_ELEMENTWISE
2361       && !STMT_VINFO_STRIDED_P (stmt_info)
2362       && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2363 	   && !GROUP_NEXT_ELEMENT (stmt_info)
2364 	   && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2365     {
2366       if (dump_enabled_p ())
2367 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 			 "not falling back to elementwise accesses\n");
2369       return false;
2370     }
2371   return true;
2372 }
2373 
2374 /* Return true if boolean argument MASK is suitable for vectorizing
2375    conditional load or store STMT.  When returning true, store the type
2376    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2377    in *MASK_VECTYPE_OUT.  */
2378 
2379 static bool
2380 vect_check_load_store_mask (gimple *stmt, tree mask,
2381 			    vect_def_type *mask_dt_out,
2382 			    tree *mask_vectype_out)
2383 {
2384   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2385     {
2386       if (dump_enabled_p ())
2387 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2388 			 "mask argument is not a boolean.\n");
2389       return false;
2390     }
2391 
2392   if (TREE_CODE (mask) != SSA_NAME)
2393     {
2394       if (dump_enabled_p ())
2395 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 			 "mask argument is not an SSA name.\n");
2397       return false;
2398     }
2399 
2400   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2401   gimple *def_stmt;
2402   enum vect_def_type mask_dt;
2403   tree mask_vectype;
2404   if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2405 			   &mask_vectype))
2406     {
2407       if (dump_enabled_p ())
2408 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 			 "mask use not simple.\n");
2410       return false;
2411     }
2412 
2413   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2414   if (!mask_vectype)
2415     mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2416 
2417   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2418     {
2419       if (dump_enabled_p ())
2420 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 			 "could not find an appropriate vector mask type.\n");
2422       return false;
2423     }
2424 
2425   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2426 		TYPE_VECTOR_SUBPARTS (vectype)))
2427     {
2428       if (dump_enabled_p ())
2429 	{
2430 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2431 			   "vector mask type ");
2432 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2433 	  dump_printf (MSG_MISSED_OPTIMIZATION,
2434 		       " does not match vector data type ");
2435 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2436 	  dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2437 	}
2438       return false;
2439     }
2440 
2441   *mask_dt_out = mask_dt;
2442   *mask_vectype_out = mask_vectype;
2443   return true;
2444 }
2445 
2446 /* Return true if stored value RHS is suitable for vectorizing store
2447    statement STMT.  When returning true, store the type of the
2448    definition in *RHS_DT_OUT, the type of the vectorized store value in
2449    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2450 
2451 static bool
2452 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2453 		      tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2454 {
2455   /* In the case this is a store from a constant make sure
2456      native_encode_expr can handle it.  */
2457   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2458     {
2459       if (dump_enabled_p ())
2460 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 			 "cannot encode constant as a byte sequence.\n");
2462       return false;
2463     }
2464 
2465   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2466   gimple *def_stmt;
2467   enum vect_def_type rhs_dt;
2468   tree rhs_vectype;
2469   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2470 			   &rhs_vectype))
2471     {
2472       if (dump_enabled_p ())
2473 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474 			 "use not simple.\n");
2475       return false;
2476     }
2477 
2478   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2480     {
2481       if (dump_enabled_p ())
2482 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 			 "incompatible vector types.\n");
2484       return false;
2485     }
2486 
2487   *rhs_dt_out = rhs_dt;
2488   *rhs_vectype_out = rhs_vectype;
2489   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2490     *vls_type_out = VLS_STORE_INVARIANT;
2491   else
2492     *vls_type_out = VLS_STORE;
2493   return true;
2494 }
2495 
2496 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2497    Note that we support masks with floating-point type, in which case the
2498    floats are interpreted as a bitmask.  */
2499 
2500 static tree
2501 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2502 {
2503   if (TREE_CODE (masktype) == INTEGER_TYPE)
2504     return build_int_cst (masktype, -1);
2505   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2506     {
2507       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2508       mask = build_vector_from_val (masktype, mask);
2509       return vect_init_vector (stmt, mask, masktype, NULL);
2510     }
2511   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2512     {
2513       REAL_VALUE_TYPE r;
2514       long tmp[6];
2515       for (int j = 0; j < 6; ++j)
2516 	tmp[j] = -1;
2517       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2518       tree mask = build_real (TREE_TYPE (masktype), r);
2519       mask = build_vector_from_val (masktype, mask);
2520       return vect_init_vector (stmt, mask, masktype, NULL);
2521     }
2522   gcc_unreachable ();
2523 }
2524 
2525 /* Build an all-zero merge value of type VECTYPE while vectorizing
2526    STMT as a gather load.  */
2527 
2528 static tree
2529 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2530 {
2531   tree merge;
2532   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2533     merge = build_int_cst (TREE_TYPE (vectype), 0);
2534   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2535     {
2536       REAL_VALUE_TYPE r;
2537       long tmp[6];
2538       for (int j = 0; j < 6; ++j)
2539 	tmp[j] = 0;
2540       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2541       merge = build_real (TREE_TYPE (vectype), r);
2542     }
2543   else
2544     gcc_unreachable ();
2545   merge = build_vector_from_val (vectype, merge);
2546   return vect_init_vector (stmt, merge, vectype, NULL);
2547 }
2548 
2549 /* Build a gather load call while vectorizing STMT.  Insert new instructions
2550    before GSI and add them to VEC_STMT.  GS_INFO describes the gather load
2551    operation.  If the load is conditional, MASK is the unvectorized
2552    condition and MASK_DT is its definition type, otherwise MASK is null.  */
2553 
2554 static void
2555 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2556 			      gimple **vec_stmt, gather_scatter_info *gs_info,
2557 			      tree mask, vect_def_type mask_dt)
2558 {
2559   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2560   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2561   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2562   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2563   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2564   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2565   edge pe = loop_preheader_edge (loop);
2566   enum { NARROW, NONE, WIDEN } modifier;
2567   poly_uint64 gather_off_nunits
2568     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2569 
2570   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2571   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2572   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2573   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2574   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2575   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2576   tree scaletype = TREE_VALUE (arglist);
2577   gcc_checking_assert (types_compatible_p (srctype, rettype)
2578 		       && (!mask || types_compatible_p (srctype, masktype)));
2579 
2580   tree perm_mask = NULL_TREE;
2581   tree mask_perm_mask = NULL_TREE;
2582   if (known_eq (nunits, gather_off_nunits))
2583     modifier = NONE;
2584   else if (known_eq (nunits * 2, gather_off_nunits))
2585     {
2586       modifier = WIDEN;
2587 
2588       /* Currently widening gathers and scatters are only supported for
2589 	 fixed-length vectors.  */
2590       int count = gather_off_nunits.to_constant ();
2591       vec_perm_builder sel (count, count, 1);
2592       for (int i = 0; i < count; ++i)
2593 	sel.quick_push (i | (count / 2));
2594 
2595       vec_perm_indices indices (sel, 1, count);
2596       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2597 					      indices);
2598     }
2599   else if (known_eq (nunits, gather_off_nunits * 2))
2600     {
2601       modifier = NARROW;
2602 
2603       /* Currently narrowing gathers and scatters are only supported for
2604 	 fixed-length vectors.  */
2605       int count = nunits.to_constant ();
2606       vec_perm_builder sel (count, count, 1);
2607       sel.quick_grow (count);
2608       for (int i = 0; i < count; ++i)
2609 	sel[i] = i < count / 2 ? i : i + count / 2;
2610       vec_perm_indices indices (sel, 2, count);
2611       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2612 
2613       ncopies *= 2;
2614 
2615       if (mask)
2616 	{
2617 	  for (int i = 0; i < count; ++i)
2618 	    sel[i] = i | (count / 2);
2619 	  indices.new_vector (sel, 2, count);
2620 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2621 	}
2622     }
2623   else
2624     gcc_unreachable ();
2625 
2626   tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2627 					       vectype);
2628 
2629   tree ptr = fold_convert (ptrtype, gs_info->base);
2630   if (!is_gimple_min_invariant (ptr))
2631     {
2632       gimple_seq seq;
2633       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2634       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2635       gcc_assert (!new_bb);
2636     }
2637 
2638   tree scale = build_int_cst (scaletype, gs_info->scale);
2639 
2640   tree vec_oprnd0 = NULL_TREE;
2641   tree vec_mask = NULL_TREE;
2642   tree src_op = NULL_TREE;
2643   tree mask_op = NULL_TREE;
2644   tree prev_res = NULL_TREE;
2645   stmt_vec_info prev_stmt_info = NULL;
2646 
2647   if (!mask)
2648     {
2649       src_op = vect_build_zero_merge_argument (stmt, rettype);
2650       mask_op = vect_build_all_ones_mask (stmt, masktype);
2651     }
2652 
2653   for (int j = 0; j < ncopies; ++j)
2654     {
2655       tree op, var;
2656       gimple *new_stmt;
2657       if (modifier == WIDEN && (j & 1))
2658 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2659 				   perm_mask, stmt, gsi);
2660       else if (j == 0)
2661 	op = vec_oprnd0
2662 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2663       else
2664 	op = vec_oprnd0
2665 	  = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2666 
2667       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2668 	{
2669 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2670 				TYPE_VECTOR_SUBPARTS (idxtype)));
2671 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2672 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2673 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2674 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2675 	  op = var;
2676 	}
2677 
2678       if (mask)
2679 	{
2680 	  if (mask_perm_mask && (j & 1))
2681 	    mask_op = permute_vec_elements (mask_op, mask_op,
2682 					    mask_perm_mask, stmt, gsi);
2683 	  else
2684 	    {
2685 	      if (j == 0)
2686 		vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2687 	      else
2688 		vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2689 
2690 	      mask_op = vec_mask;
2691 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2692 		{
2693 		  gcc_assert
2694 		    (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2695 			       TYPE_VECTOR_SUBPARTS (masktype)));
2696 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2697 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2698 		  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2699 						  mask_op);
2700 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2701 		  mask_op = var;
2702 		}
2703 	    }
2704 	  src_op = mask_op;
2705 	}
2706 
2707       new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2708 				    mask_op, scale);
2709 
2710       if (!useless_type_conversion_p (vectype, rettype))
2711 	{
2712 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2713 				TYPE_VECTOR_SUBPARTS (rettype)));
2714 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2715 	  gimple_call_set_lhs (new_stmt, op);
2716 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2717 	  var = make_ssa_name (vec_dest);
2718 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2719 	  new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2720 	}
2721       else
2722 	{
2723 	  var = make_ssa_name (vec_dest, new_stmt);
2724 	  gimple_call_set_lhs (new_stmt, var);
2725 	}
2726 
2727       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728 
2729       if (modifier == NARROW)
2730 	{
2731 	  if ((j & 1) == 0)
2732 	    {
2733 	      prev_res = var;
2734 	      continue;
2735 	    }
2736 	  var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2737 	  new_stmt = SSA_NAME_DEF_STMT (var);
2738 	}
2739 
2740       if (prev_stmt_info == NULL)
2741 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2742       else
2743 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2744       prev_stmt_info = vinfo_for_stmt (new_stmt);
2745     }
2746 }
2747 
2748 /* Prepare the base and offset in GS_INFO for vectorization.
2749    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2750    to the vectorized offset argument for the first copy of STMT.  STMT
2751    is the statement described by GS_INFO and LOOP is the containing loop.  */
2752 
2753 static void
2754 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2755 			     gather_scatter_info *gs_info,
2756 			     tree *dataref_ptr, tree *vec_offset)
2757 {
2758   gimple_seq stmts = NULL;
2759   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2760   if (stmts != NULL)
2761     {
2762       basic_block new_bb;
2763       edge pe = loop_preheader_edge (loop);
2764       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2765       gcc_assert (!new_bb);
2766     }
2767   tree offset_type = TREE_TYPE (gs_info->offset);
2768   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2769   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2770 					      offset_vectype);
2771 }
2772 
2773 /* Prepare to implement a grouped or strided load or store using
2774    the gather load or scatter store operation described by GS_INFO.
2775    STMT is the load or store statement.
2776 
2777    Set *DATAREF_BUMP to the amount that should be added to the base
2778    address after each copy of the vectorized statement.  Set *VEC_OFFSET
2779    to an invariant offset vector in which element I has the value
2780    I * DR_STEP / SCALE.  */
2781 
2782 static void
2783 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2784 				 gather_scatter_info *gs_info,
2785 				 tree *dataref_bump, tree *vec_offset)
2786 {
2787   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2788   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2789   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2790   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2791   gimple_seq stmts;
2792 
2793   tree bump = size_binop (MULT_EXPR,
2794 			  fold_convert (sizetype, DR_STEP (dr)),
2795 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2796   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2797   if (stmts)
2798     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2799 
2800   /* The offset given in GS_INFO can have pointer type, so use the element
2801      type of the vector instead.  */
2802   tree offset_type = TREE_TYPE (gs_info->offset);
2803   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2804   offset_type = TREE_TYPE (offset_vectype);
2805 
2806   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
2807   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2808 			  ssize_int (gs_info->scale));
2809   step = fold_convert (offset_type, step);
2810   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2811 
2812   /* Create {0, X, X*2, X*3, ...}.  */
2813   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2814 			      build_zero_cst (offset_type), step);
2815   if (stmts)
2816     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2817 }
2818 
2819 /* Return the amount that should be added to a vector pointer to move
2820    to the next or previous copy of AGGR_TYPE.  DR is the data reference
2821    being vectorized and MEMORY_ACCESS_TYPE describes the type of
2822    vectorization.  */
2823 
2824 static tree
2825 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2826 			     vect_memory_access_type memory_access_type)
2827 {
2828   if (memory_access_type == VMAT_INVARIANT)
2829     return size_zero_node;
2830 
2831   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2832   tree step = vect_dr_behavior (dr)->step;
2833   if (tree_int_cst_sgn (step) == -1)
2834     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2835   return iv_step;
2836 }
2837 
2838 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
2839 
2840 static bool
2841 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2842 		    gimple **vec_stmt, slp_tree slp_node,
2843 		    tree vectype_in, enum vect_def_type *dt)
2844 {
2845   tree op, vectype;
2846   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2847   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2848   unsigned ncopies;
2849   unsigned HOST_WIDE_INT nunits, num_bytes;
2850 
2851   op = gimple_call_arg (stmt, 0);
2852   vectype = STMT_VINFO_VECTYPE (stmt_info);
2853 
2854   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2855     return false;
2856 
2857   /* Multiple types in SLP are handled by creating the appropriate number of
2858      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2859      case of SLP.  */
2860   if (slp_node)
2861     ncopies = 1;
2862   else
2863     ncopies = vect_get_num_copies (loop_vinfo, vectype);
2864 
2865   gcc_assert (ncopies >= 1);
2866 
2867   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2868   if (! char_vectype)
2869     return false;
2870 
2871   if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2872     return false;
2873 
2874   unsigned word_bytes = num_bytes / nunits;
2875 
2876   /* The encoding uses one stepped pattern for each byte in the word.  */
2877   vec_perm_builder elts (num_bytes, word_bytes, 3);
2878   for (unsigned i = 0; i < 3; ++i)
2879     for (unsigned j = 0; j < word_bytes; ++j)
2880       elts.quick_push ((i + 1) * word_bytes - j - 1);
2881 
2882   vec_perm_indices indices (elts, 1, num_bytes);
2883   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2884     return false;
2885 
2886   if (! vec_stmt)
2887     {
2888       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2889       if (dump_enabled_p ())
2890         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2891                          "\n");
2892       if (! slp_node)
2893 	{
2894 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
2895 			 1, vector_stmt, stmt_info, 0, vect_prologue);
2896 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
2897 			 ncopies, vec_perm, stmt_info, 0, vect_body);
2898 	}
2899       return true;
2900     }
2901 
2902   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2903 
2904   /* Transform.  */
2905   vec<tree> vec_oprnds = vNULL;
2906   gimple *new_stmt = NULL;
2907   stmt_vec_info prev_stmt_info = NULL;
2908   for (unsigned j = 0; j < ncopies; j++)
2909     {
2910       /* Handle uses.  */
2911       if (j == 0)
2912         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2913       else
2914         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2915 
2916       /* Arguments are ready. create the new vector stmt.  */
2917       unsigned i;
2918       tree vop;
2919       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2920        {
2921 	 tree tem = make_ssa_name (char_vectype);
2922 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2923 						      char_vectype, vop));
2924 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2925 	 tree tem2 = make_ssa_name (char_vectype);
2926 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2927 					 tem, tem, bswap_vconst);
2928 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2929 	 tem = make_ssa_name (vectype);
2930 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2931 						      vectype, tem2));
2932 	 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933          if (slp_node)
2934            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2935        }
2936 
2937       if (slp_node)
2938         continue;
2939 
2940       if (j == 0)
2941         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2942       else
2943         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2944 
2945       prev_stmt_info = vinfo_for_stmt (new_stmt);
2946     }
2947 
2948   vec_oprnds.release ();
2949   return true;
2950 }
2951 
2952 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2953    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2954    in a single step.  On success, store the binary pack code in
2955    *CONVERT_CODE.  */
2956 
2957 static bool
2958 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2959 			  tree_code *convert_code)
2960 {
2961   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2962       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2963     return false;
2964 
2965   tree_code code;
2966   int multi_step_cvt = 0;
2967   auto_vec <tree, 8> interm_types;
2968   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2969 					&code, &multi_step_cvt,
2970 					&interm_types)
2971       || multi_step_cvt)
2972     return false;
2973 
2974   *convert_code = code;
2975   return true;
2976 }
2977 
2978 /* Function vectorizable_call.
2979 
2980    Check if GS performs a function call that can be vectorized.
2981    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2982    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2983    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2984 
2985 static bool
2986 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2987 		   slp_tree slp_node)
2988 {
2989   gcall *stmt;
2990   tree vec_dest;
2991   tree scalar_dest;
2992   tree op, type;
2993   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2994   stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2995   tree vectype_out, vectype_in;
2996   poly_uint64 nunits_in;
2997   poly_uint64 nunits_out;
2998   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2999   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3000   vec_info *vinfo = stmt_info->vinfo;
3001   tree fndecl, new_temp, rhs_type;
3002   gimple *def_stmt;
3003   enum vect_def_type dt[3]
3004     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3005   int ndts = 3;
3006   gimple *new_stmt = NULL;
3007   int ncopies, j;
3008   vec<tree> vargs = vNULL;
3009   enum { NARROW, NONE, WIDEN } modifier;
3010   size_t i, nargs;
3011   tree lhs;
3012 
3013   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3014     return false;
3015 
3016   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3017       && ! vec_stmt)
3018     return false;
3019 
3020   /* Is GS a vectorizable call?   */
3021   stmt = dyn_cast <gcall *> (gs);
3022   if (!stmt)
3023     return false;
3024 
3025   if (gimple_call_internal_p (stmt)
3026       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3027 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3028     /* Handled by vectorizable_load and vectorizable_store.  */
3029     return false;
3030 
3031   if (gimple_call_lhs (stmt) == NULL_TREE
3032       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3033     return false;
3034 
3035   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3036 
3037   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3038 
3039   /* Process function arguments.  */
3040   rhs_type = NULL_TREE;
3041   vectype_in = NULL_TREE;
3042   nargs = gimple_call_num_args (stmt);
3043 
3044   /* Bail out if the function has more than three arguments, we do not have
3045      interesting builtin functions to vectorize with more than two arguments
3046      except for fma.  No arguments is also not good.  */
3047   if (nargs == 0 || nargs > 3)
3048     return false;
3049 
3050   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
3051   if (gimple_call_internal_p (stmt)
3052       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3053     {
3054       nargs = 0;
3055       rhs_type = unsigned_type_node;
3056     }
3057 
3058   for (i = 0; i < nargs; i++)
3059     {
3060       tree opvectype;
3061 
3062       op = gimple_call_arg (stmt, i);
3063 
3064       /* We can only handle calls with arguments of the same type.  */
3065       if (rhs_type
3066 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3067 	{
3068 	  if (dump_enabled_p ())
3069 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3070                              "argument types differ.\n");
3071 	  return false;
3072 	}
3073       if (!rhs_type)
3074 	rhs_type = TREE_TYPE (op);
3075 
3076       if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3077 	{
3078 	  if (dump_enabled_p ())
3079 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080                              "use not simple.\n");
3081 	  return false;
3082 	}
3083 
3084       if (!vectype_in)
3085 	vectype_in = opvectype;
3086       else if (opvectype
3087 	       && opvectype != vectype_in)
3088 	{
3089 	  if (dump_enabled_p ())
3090 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3091                              "argument vector types differ.\n");
3092 	  return false;
3093 	}
3094     }
3095   /* If all arguments are external or constant defs use a vector type with
3096      the same size as the output vector type.  */
3097   if (!vectype_in)
3098     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3099   if (vec_stmt)
3100     gcc_assert (vectype_in);
3101   if (!vectype_in)
3102     {
3103       if (dump_enabled_p ())
3104         {
3105           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106                            "no vectype for scalar type ");
3107           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3108           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3109         }
3110 
3111       return false;
3112     }
3113 
3114   /* FORNOW */
3115   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3116   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3117   if (known_eq (nunits_in * 2, nunits_out))
3118     modifier = NARROW;
3119   else if (known_eq (nunits_out, nunits_in))
3120     modifier = NONE;
3121   else if (known_eq (nunits_out * 2, nunits_in))
3122     modifier = WIDEN;
3123   else
3124     return false;
3125 
3126   /* We only handle functions that do not read or clobber memory.  */
3127   if (gimple_vuse (stmt))
3128     {
3129       if (dump_enabled_p ())
3130 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131 			 "function reads from or writes to memory.\n");
3132       return false;
3133     }
3134 
3135   /* For now, we only vectorize functions if a target specific builtin
3136      is available.  TODO -- in some cases, it might be profitable to
3137      insert the calls for pieces of the vector, in order to be able
3138      to vectorize other operations in the loop.  */
3139   fndecl = NULL_TREE;
3140   internal_fn ifn = IFN_LAST;
3141   combined_fn cfn = gimple_call_combined_fn (stmt);
3142   tree callee = gimple_call_fndecl (stmt);
3143 
3144   /* First try using an internal function.  */
3145   tree_code convert_code = ERROR_MARK;
3146   if (cfn != CFN_LAST
3147       && (modifier == NONE
3148 	  || (modifier == NARROW
3149 	      && simple_integer_narrowing (vectype_out, vectype_in,
3150 					   &convert_code))))
3151     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3152 					  vectype_in);
3153 
3154   /* If that fails, try asking for a target-specific built-in function.  */
3155   if (ifn == IFN_LAST)
3156     {
3157       if (cfn != CFN_LAST)
3158 	fndecl = targetm.vectorize.builtin_vectorized_function
3159 	  (cfn, vectype_out, vectype_in);
3160       else if (callee)
3161 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3162 	  (callee, vectype_out, vectype_in);
3163     }
3164 
3165   if (ifn == IFN_LAST && !fndecl)
3166     {
3167       if (cfn == CFN_GOMP_SIMD_LANE
3168 	  && !slp_node
3169 	  && loop_vinfo
3170 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3171 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3172 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3173 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3174 	{
3175 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3176 	     { 0, 1, 2, ... vf - 1 } vector.  */
3177 	  gcc_assert (nargs == 0);
3178 	}
3179       else if (modifier == NONE
3180 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3181 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3182 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3183 	return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3184 				   vectype_in, dt);
3185       else
3186 	{
3187 	  if (dump_enabled_p ())
3188 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3189 			     "function is not vectorizable.\n");
3190 	  return false;
3191 	}
3192     }
3193 
3194   if (slp_node)
3195     ncopies = 1;
3196   else if (modifier == NARROW && ifn == IFN_LAST)
3197     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3198   else
3199     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3200 
3201   /* Sanity check: make sure that at least one copy of the vectorized stmt
3202      needs to be generated.  */
3203   gcc_assert (ncopies >= 1);
3204 
3205   if (!vec_stmt) /* transformation not required.  */
3206     {
3207       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3208       if (dump_enabled_p ())
3209         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3210                          "\n");
3211       if (!slp_node)
3212 	{
3213 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3214 	  if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3215 	    add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3216 			   vec_promote_demote, stmt_info, 0, vect_body);
3217 	}
3218 
3219       return true;
3220     }
3221 
3222   /* Transform.  */
3223 
3224   if (dump_enabled_p ())
3225     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3226 
3227   /* Handle def.  */
3228   scalar_dest = gimple_call_lhs (stmt);
3229   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3230 
3231   prev_stmt_info = NULL;
3232   if (modifier == NONE || ifn != IFN_LAST)
3233     {
3234       tree prev_res = NULL_TREE;
3235       for (j = 0; j < ncopies; ++j)
3236 	{
3237 	  /* Build argument list for the vectorized call.  */
3238 	  if (j == 0)
3239 	    vargs.create (nargs);
3240 	  else
3241 	    vargs.truncate (0);
3242 
3243 	  if (slp_node)
3244 	    {
3245 	      auto_vec<vec<tree> > vec_defs (nargs);
3246 	      vec<tree> vec_oprnds0;
3247 
3248 	      for (i = 0; i < nargs; i++)
3249 		vargs.quick_push (gimple_call_arg (stmt, i));
3250 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3251 	      vec_oprnds0 = vec_defs[0];
3252 
3253 	      /* Arguments are ready.  Create the new vector stmt.  */
3254 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3255 		{
3256 		  size_t k;
3257 		  for (k = 0; k < nargs; k++)
3258 		    {
3259 		      vec<tree> vec_oprndsk = vec_defs[k];
3260 		      vargs[k] = vec_oprndsk[i];
3261 		    }
3262 		  if (modifier == NARROW)
3263 		    {
3264 		      tree half_res = make_ssa_name (vectype_in);
3265 		      gcall *call
3266 			= gimple_build_call_internal_vec (ifn, vargs);
3267 		      gimple_call_set_lhs (call, half_res);
3268 		      gimple_call_set_nothrow (call, true);
3269 		      new_stmt = call;
3270 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3271 		      if ((i & 1) == 0)
3272 			{
3273 			  prev_res = half_res;
3274 			  continue;
3275 			}
3276 		      new_temp = make_ssa_name (vec_dest);
3277 		      new_stmt = gimple_build_assign (new_temp, convert_code,
3278 						      prev_res, half_res);
3279 		    }
3280 		  else
3281 		    {
3282 		      gcall *call;
3283 		      if (ifn != IFN_LAST)
3284 			call = gimple_build_call_internal_vec (ifn, vargs);
3285 		      else
3286 			call = gimple_build_call_vec (fndecl, vargs);
3287 		      new_temp = make_ssa_name (vec_dest, call);
3288 		      gimple_call_set_lhs (call, new_temp);
3289 		      gimple_call_set_nothrow (call, true);
3290 		      new_stmt = call;
3291 		    }
3292 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3293 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3294 		}
3295 
3296 	      for (i = 0; i < nargs; i++)
3297 		{
3298 		  vec<tree> vec_oprndsi = vec_defs[i];
3299 		  vec_oprndsi.release ();
3300 		}
3301 	      continue;
3302 	    }
3303 
3304 	  for (i = 0; i < nargs; i++)
3305 	    {
3306 	      op = gimple_call_arg (stmt, i);
3307 	      if (j == 0)
3308 		vec_oprnd0
3309 		  = vect_get_vec_def_for_operand (op, stmt);
3310 	      else
3311 		{
3312 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
3313 		  vec_oprnd0
3314                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3315 		}
3316 
3317 	      vargs.quick_push (vec_oprnd0);
3318 	    }
3319 
3320 	  if (gimple_call_internal_p (stmt)
3321 	      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3322 	    {
3323 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3324 	      tree new_var
3325 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3326 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3327 	      vect_init_vector_1 (stmt, init_stmt, NULL);
3328 	      new_temp = make_ssa_name (vec_dest);
3329 	      new_stmt = gimple_build_assign (new_temp, new_var);
3330 	    }
3331 	  else if (modifier == NARROW)
3332 	    {
3333 	      tree half_res = make_ssa_name (vectype_in);
3334 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3335 	      gimple_call_set_lhs (call, half_res);
3336 	      gimple_call_set_nothrow (call, true);
3337 	      new_stmt = call;
3338 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3339 	      if ((j & 1) == 0)
3340 		{
3341 		  prev_res = half_res;
3342 		  continue;
3343 		}
3344 	      new_temp = make_ssa_name (vec_dest);
3345 	      new_stmt = gimple_build_assign (new_temp, convert_code,
3346 					      prev_res, half_res);
3347 	    }
3348 	  else
3349 	    {
3350 	      gcall *call;
3351 	      if (ifn != IFN_LAST)
3352 		call = gimple_build_call_internal_vec (ifn, vargs);
3353 	      else
3354 		call = gimple_build_call_vec (fndecl, vargs);
3355 	      new_temp = make_ssa_name (vec_dest, new_stmt);
3356 	      gimple_call_set_lhs (call, new_temp);
3357 	      gimple_call_set_nothrow (call, true);
3358 	      new_stmt = call;
3359 	    }
3360 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361 
3362 	  if (j == (modifier == NARROW ? 1 : 0))
3363 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3364 	  else
3365 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3366 
3367 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3368 	}
3369     }
3370   else if (modifier == NARROW)
3371     {
3372       for (j = 0; j < ncopies; ++j)
3373 	{
3374 	  /* Build argument list for the vectorized call.  */
3375 	  if (j == 0)
3376 	    vargs.create (nargs * 2);
3377 	  else
3378 	    vargs.truncate (0);
3379 
3380 	  if (slp_node)
3381 	    {
3382 	      auto_vec<vec<tree> > vec_defs (nargs);
3383 	      vec<tree> vec_oprnds0;
3384 
3385 	      for (i = 0; i < nargs; i++)
3386 		vargs.quick_push (gimple_call_arg (stmt, i));
3387 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3388 	      vec_oprnds0 = vec_defs[0];
3389 
3390 	      /* Arguments are ready.  Create the new vector stmt.  */
3391 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3392 		{
3393 		  size_t k;
3394 		  vargs.truncate (0);
3395 		  for (k = 0; k < nargs; k++)
3396 		    {
3397 		      vec<tree> vec_oprndsk = vec_defs[k];
3398 		      vargs.quick_push (vec_oprndsk[i]);
3399 		      vargs.quick_push (vec_oprndsk[i + 1]);
3400 		    }
3401 		  gcall *call;
3402 		  if (ifn != IFN_LAST)
3403 		    call = gimple_build_call_internal_vec (ifn, vargs);
3404 		  else
3405 		    call = gimple_build_call_vec (fndecl, vargs);
3406 		  new_temp = make_ssa_name (vec_dest, call);
3407 		  gimple_call_set_lhs (call, new_temp);
3408 		  gimple_call_set_nothrow (call, true);
3409 		  new_stmt = call;
3410 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3411 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3412 		}
3413 
3414 	      for (i = 0; i < nargs; i++)
3415 		{
3416 		  vec<tree> vec_oprndsi = vec_defs[i];
3417 		  vec_oprndsi.release ();
3418 		}
3419 	      continue;
3420 	    }
3421 
3422 	  for (i = 0; i < nargs; i++)
3423 	    {
3424 	      op = gimple_call_arg (stmt, i);
3425 	      if (j == 0)
3426 		{
3427 		  vec_oprnd0
3428 		    = vect_get_vec_def_for_operand (op, stmt);
3429 		  vec_oprnd1
3430 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3431 		}
3432 	      else
3433 		{
3434 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3435 		  vec_oprnd0
3436 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3437 		  vec_oprnd1
3438 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3439 		}
3440 
3441 	      vargs.quick_push (vec_oprnd0);
3442 	      vargs.quick_push (vec_oprnd1);
3443 	    }
3444 
3445 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
3446 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3447 	  gimple_call_set_lhs (new_stmt, new_temp);
3448 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3449 
3450 	  if (j == 0)
3451 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3452 	  else
3453 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3454 
3455 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3456 	}
3457 
3458       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3459     }
3460   else
3461     /* No current target implements this case.  */
3462     return false;
3463 
3464   vargs.release ();
3465 
3466   /* The call in STMT might prevent it from being removed in dce.
3467      We however cannot remove it here, due to the way the ssa name
3468      it defines is mapped to the new definition.  So just replace
3469      rhs of the statement with something harmless.  */
3470 
3471   if (slp_node)
3472     return true;
3473 
3474   type = TREE_TYPE (scalar_dest);
3475   if (is_pattern_stmt_p (stmt_info))
3476     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3477   else
3478     lhs = gimple_call_lhs (stmt);
3479 
3480   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3481   set_vinfo_for_stmt (new_stmt, stmt_info);
3482   set_vinfo_for_stmt (stmt, NULL);
3483   STMT_VINFO_STMT (stmt_info) = new_stmt;
3484   gsi_replace (gsi, new_stmt, false);
3485 
3486   return true;
3487 }
3488 
3489 
3490 struct simd_call_arg_info
3491 {
3492   tree vectype;
3493   tree op;
3494   HOST_WIDE_INT linear_step;
3495   enum vect_def_type dt;
3496   unsigned int align;
3497   bool simd_lane_linear;
3498 };
3499 
3500 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3501    is linear within simd lane (but not within whole loop), note it in
3502    *ARGINFO.  */
3503 
3504 static void
3505 vect_simd_lane_linear (tree op, struct loop *loop,
3506 		       struct simd_call_arg_info *arginfo)
3507 {
3508   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3509 
3510   if (!is_gimple_assign (def_stmt)
3511       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3512       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3513     return;
3514 
3515   tree base = gimple_assign_rhs1 (def_stmt);
3516   HOST_WIDE_INT linear_step = 0;
3517   tree v = gimple_assign_rhs2 (def_stmt);
3518   while (TREE_CODE (v) == SSA_NAME)
3519     {
3520       tree t;
3521       def_stmt = SSA_NAME_DEF_STMT (v);
3522       if (is_gimple_assign (def_stmt))
3523 	switch (gimple_assign_rhs_code (def_stmt))
3524 	  {
3525 	  case PLUS_EXPR:
3526 	    t = gimple_assign_rhs2 (def_stmt);
3527 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3528 	      return;
3529 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3530 	    v = gimple_assign_rhs1 (def_stmt);
3531 	    continue;
3532 	  case MULT_EXPR:
3533 	    t = gimple_assign_rhs2 (def_stmt);
3534 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3535 	      return;
3536 	    linear_step = tree_to_shwi (t);
3537 	    v = gimple_assign_rhs1 (def_stmt);
3538 	    continue;
3539 	  CASE_CONVERT:
3540 	    t = gimple_assign_rhs1 (def_stmt);
3541 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3542 		|| (TYPE_PRECISION (TREE_TYPE (v))
3543 		    < TYPE_PRECISION (TREE_TYPE (t))))
3544 	      return;
3545 	    if (!linear_step)
3546 	      linear_step = 1;
3547 	    v = t;
3548 	    continue;
3549 	  default:
3550 	    return;
3551 	  }
3552       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3553 	       && loop->simduid
3554 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3555 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3556 		   == loop->simduid))
3557 	{
3558 	  if (!linear_step)
3559 	    linear_step = 1;
3560 	  arginfo->linear_step = linear_step;
3561 	  arginfo->op = base;
3562 	  arginfo->simd_lane_linear = true;
3563 	  return;
3564 	}
3565     }
3566 }
3567 
3568 /* Return the number of elements in vector type VECTYPE, which is associated
3569    with a SIMD clone.  At present these vectors always have a constant
3570    length.  */
3571 
3572 static unsigned HOST_WIDE_INT
3573 simd_clone_subparts (tree vectype)
3574 {
3575   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3576 }
3577 
3578 /* Function vectorizable_simd_clone_call.
3579 
3580    Check if STMT performs a function call that can be vectorized
3581    by calling a simd clone of the function.
3582    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3583    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3584    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3585 
3586 static bool
3587 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3588 			      gimple **vec_stmt, slp_tree slp_node)
3589 {
3590   tree vec_dest;
3591   tree scalar_dest;
3592   tree op, type;
3593   tree vec_oprnd0 = NULL_TREE;
3594   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3595   tree vectype;
3596   unsigned int nunits;
3597   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3598   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3599   vec_info *vinfo = stmt_info->vinfo;
3600   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3601   tree fndecl, new_temp;
3602   gimple *def_stmt;
3603   gimple *new_stmt = NULL;
3604   int ncopies, j;
3605   auto_vec<simd_call_arg_info> arginfo;
3606   vec<tree> vargs = vNULL;
3607   size_t i, nargs;
3608   tree lhs, rtype, ratype;
3609   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3610 
3611   /* Is STMT a vectorizable call?   */
3612   if (!is_gimple_call (stmt))
3613     return false;
3614 
3615   fndecl = gimple_call_fndecl (stmt);
3616   if (fndecl == NULL_TREE)
3617     return false;
3618 
3619   struct cgraph_node *node = cgraph_node::get (fndecl);
3620   if (node == NULL || node->simd_clones == NULL)
3621     return false;
3622 
3623   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3624     return false;
3625 
3626   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3627       && ! vec_stmt)
3628     return false;
3629 
3630   if (gimple_call_lhs (stmt)
3631       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3632     return false;
3633 
3634   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3635 
3636   vectype = STMT_VINFO_VECTYPE (stmt_info);
3637 
3638   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3639     return false;
3640 
3641   /* FORNOW */
3642   if (slp_node)
3643     return false;
3644 
3645   /* Process function arguments.  */
3646   nargs = gimple_call_num_args (stmt);
3647 
3648   /* Bail out if the function has zero arguments.  */
3649   if (nargs == 0)
3650     return false;
3651 
3652   arginfo.reserve (nargs, true);
3653 
3654   for (i = 0; i < nargs; i++)
3655     {
3656       simd_call_arg_info thisarginfo;
3657       affine_iv iv;
3658 
3659       thisarginfo.linear_step = 0;
3660       thisarginfo.align = 0;
3661       thisarginfo.op = NULL_TREE;
3662       thisarginfo.simd_lane_linear = false;
3663 
3664       op = gimple_call_arg (stmt, i);
3665       if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3666 			       &thisarginfo.vectype)
3667 	  || thisarginfo.dt == vect_uninitialized_def)
3668 	{
3669 	  if (dump_enabled_p ())
3670 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3671 			     "use not simple.\n");
3672 	  return false;
3673 	}
3674 
3675       if (thisarginfo.dt == vect_constant_def
3676 	  || thisarginfo.dt == vect_external_def)
3677 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3678       else
3679 	gcc_assert (thisarginfo.vectype != NULL_TREE);
3680 
3681       /* For linear arguments, the analyze phase should have saved
3682 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3683       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3684 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3685 	{
3686 	  gcc_assert (vec_stmt);
3687 	  thisarginfo.linear_step
3688 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3689 	  thisarginfo.op
3690 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3691 	  thisarginfo.simd_lane_linear
3692 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3693 	       == boolean_true_node);
3694 	  /* If loop has been peeled for alignment, we need to adjust it.  */
3695 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3696 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3697 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
3698 	    {
3699 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3700 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3701 	      tree opt = TREE_TYPE (thisarginfo.op);
3702 	      bias = fold_convert (TREE_TYPE (step), bias);
3703 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3704 	      thisarginfo.op
3705 		= fold_build2 (POINTER_TYPE_P (opt)
3706 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3707 			       thisarginfo.op, bias);
3708 	    }
3709 	}
3710       else if (!vec_stmt
3711 	       && thisarginfo.dt != vect_constant_def
3712 	       && thisarginfo.dt != vect_external_def
3713 	       && loop_vinfo
3714 	       && TREE_CODE (op) == SSA_NAME
3715 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
3716 			     &iv, false)
3717 	       && tree_fits_shwi_p (iv.step))
3718 	{
3719 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
3720 	  thisarginfo.op = iv.base;
3721 	}
3722       else if ((thisarginfo.dt == vect_constant_def
3723 		|| thisarginfo.dt == vect_external_def)
3724 	       && POINTER_TYPE_P (TREE_TYPE (op)))
3725 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3726       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3727 	 linear too.  */
3728       if (POINTER_TYPE_P (TREE_TYPE (op))
3729 	  && !thisarginfo.linear_step
3730 	  && !vec_stmt
3731 	  && thisarginfo.dt != vect_constant_def
3732 	  && thisarginfo.dt != vect_external_def
3733 	  && loop_vinfo
3734 	  && !slp_node
3735 	  && TREE_CODE (op) == SSA_NAME)
3736 	vect_simd_lane_linear (op, loop, &thisarginfo);
3737 
3738       arginfo.quick_push (thisarginfo);
3739     }
3740 
3741   unsigned HOST_WIDE_INT vf;
3742   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3743     {
3744       if (dump_enabled_p ())
3745 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3746 			 "not considering SIMD clones; not yet supported"
3747 			 " for variable-width vectors.\n");
3748       return NULL;
3749     }
3750 
3751   unsigned int badness = 0;
3752   struct cgraph_node *bestn = NULL;
3753   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3754     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3755   else
3756     for (struct cgraph_node *n = node->simd_clones; n != NULL;
3757 	 n = n->simdclone->next_clone)
3758       {
3759 	unsigned int this_badness = 0;
3760 	if (n->simdclone->simdlen > vf
3761 	    || n->simdclone->nargs != nargs)
3762 	  continue;
3763 	if (n->simdclone->simdlen < vf)
3764 	  this_badness += (exact_log2 (vf)
3765 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
3766 	if (n->simdclone->inbranch)
3767 	  this_badness += 2048;
3768 	int target_badness = targetm.simd_clone.usable (n);
3769 	if (target_badness < 0)
3770 	  continue;
3771 	this_badness += target_badness * 512;
3772 	/* FORNOW: Have to add code to add the mask argument.  */
3773 	if (n->simdclone->inbranch)
3774 	  continue;
3775 	for (i = 0; i < nargs; i++)
3776 	  {
3777 	    switch (n->simdclone->args[i].arg_type)
3778 	      {
3779 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
3780 		if (!useless_type_conversion_p
3781 			(n->simdclone->args[i].orig_type,
3782 			 TREE_TYPE (gimple_call_arg (stmt, i))))
3783 		  i = -1;
3784 		else if (arginfo[i].dt == vect_constant_def
3785 			 || arginfo[i].dt == vect_external_def
3786 			 || arginfo[i].linear_step)
3787 		  this_badness += 64;
3788 		break;
3789 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
3790 		if (arginfo[i].dt != vect_constant_def
3791 		    && arginfo[i].dt != vect_external_def)
3792 		  i = -1;
3793 		break;
3794 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3795 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3796 		if (arginfo[i].dt == vect_constant_def
3797 		    || arginfo[i].dt == vect_external_def
3798 		    || (arginfo[i].linear_step
3799 			!= n->simdclone->args[i].linear_step))
3800 		  i = -1;
3801 		break;
3802 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3803 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3804 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3805 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3806 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3807 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3808 		/* FORNOW */
3809 		i = -1;
3810 		break;
3811 	      case SIMD_CLONE_ARG_TYPE_MASK:
3812 		gcc_unreachable ();
3813 	      }
3814 	    if (i == (size_t) -1)
3815 	      break;
3816 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
3817 	      {
3818 		i = -1;
3819 		break;
3820 	      }
3821 	    if (arginfo[i].align)
3822 	      this_badness += (exact_log2 (arginfo[i].align)
3823 			       - exact_log2 (n->simdclone->args[i].alignment));
3824 	  }
3825 	if (i == (size_t) -1)
3826 	  continue;
3827 	if (bestn == NULL || this_badness < badness)
3828 	  {
3829 	    bestn = n;
3830 	    badness = this_badness;
3831 	  }
3832       }
3833 
3834   if (bestn == NULL)
3835     return false;
3836 
3837   for (i = 0; i < nargs; i++)
3838     if ((arginfo[i].dt == vect_constant_def
3839 	 || arginfo[i].dt == vect_external_def)
3840 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3841       {
3842 	arginfo[i].vectype
3843 	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3844 								     i)));
3845 	if (arginfo[i].vectype == NULL
3846 	    || (simd_clone_subparts (arginfo[i].vectype)
3847 		> bestn->simdclone->simdlen))
3848 	  return false;
3849       }
3850 
3851   fndecl = bestn->decl;
3852   nunits = bestn->simdclone->simdlen;
3853   ncopies = vf / nunits;
3854 
3855   /* If the function isn't const, only allow it in simd loops where user
3856      has asserted that at least nunits consecutive iterations can be
3857      performed using SIMD instructions.  */
3858   if ((loop == NULL || (unsigned) loop->safelen < nunits)
3859       && gimple_vuse (stmt))
3860     return false;
3861 
3862   /* Sanity check: make sure that at least one copy of the vectorized stmt
3863      needs to be generated.  */
3864   gcc_assert (ncopies >= 1);
3865 
3866   if (!vec_stmt) /* transformation not required.  */
3867     {
3868       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3869       for (i = 0; i < nargs; i++)
3870 	if ((bestn->simdclone->args[i].arg_type
3871 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3872 	    || (bestn->simdclone->args[i].arg_type
3873 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3874 	  {
3875 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3876 									+ 1);
3877 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3878 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3879 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
3880 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
3881 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3882 	    tree sll = arginfo[i].simd_lane_linear
3883 		       ? boolean_true_node : boolean_false_node;
3884 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3885 	  }
3886       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3887       if (dump_enabled_p ())
3888 	dump_printf_loc (MSG_NOTE, vect_location,
3889 			 "=== vectorizable_simd_clone_call ===\n");
3890 /*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3891       return true;
3892     }
3893 
3894   /* Transform.  */
3895 
3896   if (dump_enabled_p ())
3897     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3898 
3899   /* Handle def.  */
3900   scalar_dest = gimple_call_lhs (stmt);
3901   vec_dest = NULL_TREE;
3902   rtype = NULL_TREE;
3903   ratype = NULL_TREE;
3904   if (scalar_dest)
3905     {
3906       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3907       rtype = TREE_TYPE (TREE_TYPE (fndecl));
3908       if (TREE_CODE (rtype) == ARRAY_TYPE)
3909 	{
3910 	  ratype = rtype;
3911 	  rtype = TREE_TYPE (ratype);
3912 	}
3913     }
3914 
3915   prev_stmt_info = NULL;
3916   for (j = 0; j < ncopies; ++j)
3917     {
3918       /* Build argument list for the vectorized call.  */
3919       if (j == 0)
3920 	vargs.create (nargs);
3921       else
3922 	vargs.truncate (0);
3923 
3924       for (i = 0; i < nargs; i++)
3925 	{
3926 	  unsigned int k, l, m, o;
3927 	  tree atype;
3928 	  op = gimple_call_arg (stmt, i);
3929 	  switch (bestn->simdclone->args[i].arg_type)
3930 	    {
3931 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
3932 	      atype = bestn->simdclone->args[i].vector_type;
3933 	      o = nunits / simd_clone_subparts (atype);
3934 	      for (m = j * o; m < (j + 1) * o; m++)
3935 		{
3936 		  if (simd_clone_subparts (atype)
3937 		      < simd_clone_subparts (arginfo[i].vectype))
3938 		    {
3939 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3940 		      k = (simd_clone_subparts (arginfo[i].vectype)
3941 			   / simd_clone_subparts (atype));
3942 		      gcc_assert ((k & (k - 1)) == 0);
3943 		      if (m == 0)
3944 			vec_oprnd0
3945 			  = vect_get_vec_def_for_operand (op, stmt);
3946 		      else
3947 			{
3948 			  vec_oprnd0 = arginfo[i].op;
3949 			  if ((m & (k - 1)) == 0)
3950 			    vec_oprnd0
3951 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3952 								vec_oprnd0);
3953 			}
3954 		      arginfo[i].op = vec_oprnd0;
3955 		      vec_oprnd0
3956 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3957 				  bitsize_int (prec),
3958 				  bitsize_int ((m & (k - 1)) * prec));
3959 		      new_stmt
3960 			= gimple_build_assign (make_ssa_name (atype),
3961 					       vec_oprnd0);
3962 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3963 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
3964 		    }
3965 		  else
3966 		    {
3967 		      k = (simd_clone_subparts (atype)
3968 			   / simd_clone_subparts (arginfo[i].vectype));
3969 		      gcc_assert ((k & (k - 1)) == 0);
3970 		      vec<constructor_elt, va_gc> *ctor_elts;
3971 		      if (k != 1)
3972 			vec_alloc (ctor_elts, k);
3973 		      else
3974 			ctor_elts = NULL;
3975 		      for (l = 0; l < k; l++)
3976 			{
3977 			  if (m == 0 && l == 0)
3978 			    vec_oprnd0
3979 			      = vect_get_vec_def_for_operand (op, stmt);
3980 			  else
3981 			    vec_oprnd0
3982 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3983 								arginfo[i].op);
3984 			  arginfo[i].op = vec_oprnd0;
3985 			  if (k == 1)
3986 			    break;
3987 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3988 						  vec_oprnd0);
3989 			}
3990 		      if (k == 1)
3991 			vargs.safe_push (vec_oprnd0);
3992 		      else
3993 			{
3994 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
3995 			  new_stmt
3996 			    = gimple_build_assign (make_ssa_name (atype),
3997 						   vec_oprnd0);
3998 			  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3999 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4000 			}
4001 		    }
4002 		}
4003 	      break;
4004 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4005 	      vargs.safe_push (op);
4006 	      break;
4007 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4008 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4009 	      if (j == 0)
4010 		{
4011 		  gimple_seq stmts;
4012 		  arginfo[i].op
4013 		    = force_gimple_operand (arginfo[i].op, &stmts, true,
4014 					    NULL_TREE);
4015 		  if (stmts != NULL)
4016 		    {
4017 		      basic_block new_bb;
4018 		      edge pe = loop_preheader_edge (loop);
4019 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4020 		      gcc_assert (!new_bb);
4021 		    }
4022 		  if (arginfo[i].simd_lane_linear)
4023 		    {
4024 		      vargs.safe_push (arginfo[i].op);
4025 		      break;
4026 		    }
4027 		  tree phi_res = copy_ssa_name (op);
4028 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4029 		  set_vinfo_for_stmt (new_phi,
4030 				      new_stmt_vec_info (new_phi, loop_vinfo));
4031 		  add_phi_arg (new_phi, arginfo[i].op,
4032 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4033 		  enum tree_code code
4034 		    = POINTER_TYPE_P (TREE_TYPE (op))
4035 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4036 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4037 			      ? sizetype : TREE_TYPE (op);
4038 		  widest_int cst
4039 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4040 			       ncopies * nunits);
4041 		  tree tcst = wide_int_to_tree (type, cst);
4042 		  tree phi_arg = copy_ssa_name (op);
4043 		  new_stmt
4044 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4045 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4046 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4047 		  set_vinfo_for_stmt (new_stmt,
4048 				      new_stmt_vec_info (new_stmt, loop_vinfo));
4049 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4050 			       UNKNOWN_LOCATION);
4051 		  arginfo[i].op = phi_res;
4052 		  vargs.safe_push (phi_res);
4053 		}
4054 	      else
4055 		{
4056 		  enum tree_code code
4057 		    = POINTER_TYPE_P (TREE_TYPE (op))
4058 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4059 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4060 			      ? sizetype : TREE_TYPE (op);
4061 		  widest_int cst
4062 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4063 			       j * nunits);
4064 		  tree tcst = wide_int_to_tree (type, cst);
4065 		  new_temp = make_ssa_name (TREE_TYPE (op));
4066 		  new_stmt = gimple_build_assign (new_temp, code,
4067 						  arginfo[i].op, tcst);
4068 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4069 		  vargs.safe_push (new_temp);
4070 		}
4071 	      break;
4072 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4073 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4074 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4075 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4076 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4077 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4078 	    default:
4079 	      gcc_unreachable ();
4080 	    }
4081 	}
4082 
4083       new_stmt = gimple_build_call_vec (fndecl, vargs);
4084       if (vec_dest)
4085 	{
4086 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4087 	  if (ratype)
4088 	    new_temp = create_tmp_var (ratype);
4089 	  else if (simd_clone_subparts (vectype)
4090 		   == simd_clone_subparts (rtype))
4091 	    new_temp = make_ssa_name (vec_dest, new_stmt);
4092 	  else
4093 	    new_temp = make_ssa_name (rtype, new_stmt);
4094 	  gimple_call_set_lhs (new_stmt, new_temp);
4095 	}
4096       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4097 
4098       if (vec_dest)
4099 	{
4100 	  if (simd_clone_subparts (vectype) < nunits)
4101 	    {
4102 	      unsigned int k, l;
4103 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4104 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4105 	      k = nunits / simd_clone_subparts (vectype);
4106 	      gcc_assert ((k & (k - 1)) == 0);
4107 	      for (l = 0; l < k; l++)
4108 		{
4109 		  tree t;
4110 		  if (ratype)
4111 		    {
4112 		      t = build_fold_addr_expr (new_temp);
4113 		      t = build2 (MEM_REF, vectype, t,
4114 				  build_int_cst (TREE_TYPE (t), l * bytes));
4115 		    }
4116 		  else
4117 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4118 				bitsize_int (prec), bitsize_int (l * prec));
4119 		  new_stmt
4120 		    = gimple_build_assign (make_ssa_name (vectype), t);
4121 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4122 		  if (j == 0 && l == 0)
4123 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4124 		  else
4125 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126 
4127 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4128 		}
4129 
4130 	      if (ratype)
4131 		{
4132 		  tree clobber = build_constructor (ratype, NULL);
4133 		  TREE_THIS_VOLATILE (clobber) = 1;
4134 		  new_stmt = gimple_build_assign (new_temp, clobber);
4135 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4136 		}
4137 	      continue;
4138 	    }
4139 	  else if (simd_clone_subparts (vectype) > nunits)
4140 	    {
4141 	      unsigned int k = (simd_clone_subparts (vectype)
4142 				/ simd_clone_subparts (rtype));
4143 	      gcc_assert ((k & (k - 1)) == 0);
4144 	      if ((j & (k - 1)) == 0)
4145 		vec_alloc (ret_ctor_elts, k);
4146 	      if (ratype)
4147 		{
4148 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
4149 		  for (m = 0; m < o; m++)
4150 		    {
4151 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4152 					 size_int (m), NULL_TREE, NULL_TREE);
4153 		      new_stmt
4154 			= gimple_build_assign (make_ssa_name (rtype), tem);
4155 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4156 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4157 					      gimple_assign_lhs (new_stmt));
4158 		    }
4159 		  tree clobber = build_constructor (ratype, NULL);
4160 		  TREE_THIS_VOLATILE (clobber) = 1;
4161 		  new_stmt = gimple_build_assign (new_temp, clobber);
4162 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4163 		}
4164 	      else
4165 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4166 	      if ((j & (k - 1)) != k - 1)
4167 		continue;
4168 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4169 	      new_stmt
4170 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4171 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4172 
4173 	      if ((unsigned) j == k - 1)
4174 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4175 	      else
4176 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4177 
4178 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
4179 	      continue;
4180 	    }
4181 	  else if (ratype)
4182 	    {
4183 	      tree t = build_fold_addr_expr (new_temp);
4184 	      t = build2 (MEM_REF, vectype, t,
4185 			  build_int_cst (TREE_TYPE (t), 0));
4186 	      new_stmt
4187 		= gimple_build_assign (make_ssa_name (vec_dest), t);
4188 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4189 	      tree clobber = build_constructor (ratype, NULL);
4190 	      TREE_THIS_VOLATILE (clobber) = 1;
4191 	      vect_finish_stmt_generation (stmt,
4192 					   gimple_build_assign (new_temp,
4193 								clobber), gsi);
4194 	    }
4195 	}
4196 
4197       if (j == 0)
4198 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4199       else
4200 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4201 
4202       prev_stmt_info = vinfo_for_stmt (new_stmt);
4203     }
4204 
4205   vargs.release ();
4206 
4207   /* The call in STMT might prevent it from being removed in dce.
4208      We however cannot remove it here, due to the way the ssa name
4209      it defines is mapped to the new definition.  So just replace
4210      rhs of the statement with something harmless.  */
4211 
4212   if (slp_node)
4213     return true;
4214 
4215   if (scalar_dest)
4216     {
4217       type = TREE_TYPE (scalar_dest);
4218       if (is_pattern_stmt_p (stmt_info))
4219 	lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4220       else
4221 	lhs = gimple_call_lhs (stmt);
4222       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4223     }
4224   else
4225     new_stmt = gimple_build_nop ();
4226   set_vinfo_for_stmt (new_stmt, stmt_info);
4227   set_vinfo_for_stmt (stmt, NULL);
4228   STMT_VINFO_STMT (stmt_info) = new_stmt;
4229   gsi_replace (gsi, new_stmt, true);
4230   unlink_stmt_vdef (stmt);
4231 
4232   return true;
4233 }
4234 
4235 
4236 /* Function vect_gen_widened_results_half
4237 
4238    Create a vector stmt whose code, type, number of arguments, and result
4239    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4240    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
4241    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4242    needs to be created (DECL is a function-decl of a target-builtin).
4243    STMT is the original scalar stmt that we are vectorizing.  */
4244 
4245 static gimple *
4246 vect_gen_widened_results_half (enum tree_code code,
4247 			       tree decl,
4248                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4249 			       tree vec_dest, gimple_stmt_iterator *gsi,
4250 			       gimple *stmt)
4251 {
4252   gimple *new_stmt;
4253   tree new_temp;
4254 
4255   /* Generate half of the widened result:  */
4256   if (code == CALL_EXPR)
4257     {
4258       /* Target specific support  */
4259       if (op_type == binary_op)
4260 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4261       else
4262 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4263       new_temp = make_ssa_name (vec_dest, new_stmt);
4264       gimple_call_set_lhs (new_stmt, new_temp);
4265     }
4266   else
4267     {
4268       /* Generic support */
4269       gcc_assert (op_type == TREE_CODE_LENGTH (code));
4270       if (op_type != binary_op)
4271 	vec_oprnd1 = NULL;
4272       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4273       new_temp = make_ssa_name (vec_dest, new_stmt);
4274       gimple_assign_set_lhs (new_stmt, new_temp);
4275     }
4276   vect_finish_stmt_generation (stmt, new_stmt, gsi);
4277 
4278   return new_stmt;
4279 }
4280 
4281 
4282 /* Get vectorized definitions for loop-based vectorization.  For the first
4283    operand we call vect_get_vec_def_for_operand() (with OPRND containing
4284    scalar operand), and for the rest we get a copy with
4285    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4286    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4287    The vectors are collected into VEC_OPRNDS.  */
4288 
4289 static void
4290 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4291 			  vec<tree> *vec_oprnds, int multi_step_cvt)
4292 {
4293   tree vec_oprnd;
4294 
4295   /* Get first vector operand.  */
4296   /* All the vector operands except the very first one (that is scalar oprnd)
4297      are stmt copies.  */
4298   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4299     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4300   else
4301     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4302 
4303   vec_oprnds->quick_push (vec_oprnd);
4304 
4305   /* Get second vector operand.  */
4306   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4307   vec_oprnds->quick_push (vec_oprnd);
4308 
4309   *oprnd = vec_oprnd;
4310 
4311   /* For conversion in multiple steps, continue to get operands
4312      recursively.  */
4313   if (multi_step_cvt)
4314     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
4315 }
4316 
4317 
4318 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4319    For multi-step conversions store the resulting vectors and call the function
4320    recursively.  */
4321 
4322 static void
4323 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4324 				       int multi_step_cvt, gimple *stmt,
4325 				       vec<tree> vec_dsts,
4326 				       gimple_stmt_iterator *gsi,
4327 				       slp_tree slp_node, enum tree_code code,
4328 				       stmt_vec_info *prev_stmt_info)
4329 {
4330   unsigned int i;
4331   tree vop0, vop1, new_tmp, vec_dest;
4332   gimple *new_stmt;
4333   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4334 
4335   vec_dest = vec_dsts.pop ();
4336 
4337   for (i = 0; i < vec_oprnds->length (); i += 2)
4338     {
4339       /* Create demotion operation.  */
4340       vop0 = (*vec_oprnds)[i];
4341       vop1 = (*vec_oprnds)[i + 1];
4342       new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4343       new_tmp = make_ssa_name (vec_dest, new_stmt);
4344       gimple_assign_set_lhs (new_stmt, new_tmp);
4345       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4346 
4347       if (multi_step_cvt)
4348 	/* Store the resulting vector for next recursive call.  */
4349 	(*vec_oprnds)[i/2] = new_tmp;
4350       else
4351 	{
4352 	  /* This is the last step of the conversion sequence. Store the
4353 	     vectors in SLP_NODE or in vector info of the scalar statement
4354 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4355 	  if (slp_node)
4356 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4357 	  else
4358 	    {
4359 	      if (!*prev_stmt_info)
4360 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4361 	      else
4362 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4363 
4364 	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
4365 	    }
4366 	}
4367     }
4368 
4369   /* For multi-step demotion operations we first generate demotion operations
4370      from the source type to the intermediate types, and then combine the
4371      results (stored in VEC_OPRNDS) in demotion operation to the destination
4372      type.  */
4373   if (multi_step_cvt)
4374     {
4375       /* At each level of recursion we have half of the operands we had at the
4376 	 previous level.  */
4377       vec_oprnds->truncate ((i+1)/2);
4378       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4379 					     stmt, vec_dsts, gsi, slp_node,
4380 					     VEC_PACK_TRUNC_EXPR,
4381 					     prev_stmt_info);
4382     }
4383 
4384   vec_dsts.quick_push (vec_dest);
4385 }
4386 
4387 
4388 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4389    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
4390    the resulting vectors and call the function recursively.  */
4391 
4392 static void
4393 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4394 					vec<tree> *vec_oprnds1,
4395 					gimple *stmt, tree vec_dest,
4396 					gimple_stmt_iterator *gsi,
4397 					enum tree_code code1,
4398 					enum tree_code code2, tree decl1,
4399 					tree decl2, int op_type)
4400 {
4401   int i;
4402   tree vop0, vop1, new_tmp1, new_tmp2;
4403   gimple *new_stmt1, *new_stmt2;
4404   vec<tree> vec_tmp = vNULL;
4405 
4406   vec_tmp.create (vec_oprnds0->length () * 2);
4407   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4408     {
4409       if (op_type == binary_op)
4410 	vop1 = (*vec_oprnds1)[i];
4411       else
4412 	vop1 = NULL_TREE;
4413 
4414       /* Generate the two halves of promotion operation.  */
4415       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4416 						 op_type, vec_dest, gsi, stmt);
4417       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4418 						 op_type, vec_dest, gsi, stmt);
4419       if (is_gimple_call (new_stmt1))
4420 	{
4421 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4422 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4423 	}
4424       else
4425 	{
4426 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4427 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4428 	}
4429 
4430       /* Store the results for the next step.  */
4431       vec_tmp.quick_push (new_tmp1);
4432       vec_tmp.quick_push (new_tmp2);
4433     }
4434 
4435   vec_oprnds0->release ();
4436   *vec_oprnds0 = vec_tmp;
4437 }
4438 
4439 
4440 /* Check if STMT performs a conversion operation, that can be vectorized.
4441    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4442    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4443    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4444 
4445 static bool
4446 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4447 			 gimple **vec_stmt, slp_tree slp_node)
4448 {
4449   tree vec_dest;
4450   tree scalar_dest;
4451   tree op0, op1 = NULL_TREE;
4452   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4453   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4454   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4455   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4456   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4457   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4458   tree new_temp;
4459   gimple *def_stmt;
4460   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4461   int ndts = 2;
4462   gimple *new_stmt = NULL;
4463   stmt_vec_info prev_stmt_info;
4464   poly_uint64 nunits_in;
4465   poly_uint64 nunits_out;
4466   tree vectype_out, vectype_in;
4467   int ncopies, i, j;
4468   tree lhs_type, rhs_type;
4469   enum { NARROW, NONE, WIDEN } modifier;
4470   vec<tree> vec_oprnds0 = vNULL;
4471   vec<tree> vec_oprnds1 = vNULL;
4472   tree vop0;
4473   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4474   vec_info *vinfo = stmt_info->vinfo;
4475   int multi_step_cvt = 0;
4476   vec<tree> interm_types = vNULL;
4477   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4478   int op_type;
4479   unsigned short fltsz;
4480 
4481   /* Is STMT a vectorizable conversion?   */
4482 
4483   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4484     return false;
4485 
4486   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4487       && ! vec_stmt)
4488     return false;
4489 
4490   if (!is_gimple_assign (stmt))
4491     return false;
4492 
4493   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4494     return false;
4495 
4496   code = gimple_assign_rhs_code (stmt);
4497   if (!CONVERT_EXPR_CODE_P (code)
4498       && code != FIX_TRUNC_EXPR
4499       && code != FLOAT_EXPR
4500       && code != WIDEN_MULT_EXPR
4501       && code != WIDEN_LSHIFT_EXPR)
4502     return false;
4503 
4504   op_type = TREE_CODE_LENGTH (code);
4505 
4506   /* Check types of lhs and rhs.  */
4507   scalar_dest = gimple_assign_lhs (stmt);
4508   lhs_type = TREE_TYPE (scalar_dest);
4509   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4510 
4511   op0 = gimple_assign_rhs1 (stmt);
4512   rhs_type = TREE_TYPE (op0);
4513 
4514   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4515       && !((INTEGRAL_TYPE_P (lhs_type)
4516 	    && INTEGRAL_TYPE_P (rhs_type))
4517 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4518 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4519     return false;
4520 
4521   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4522       && ((INTEGRAL_TYPE_P (lhs_type)
4523 	   && !type_has_mode_precision_p (lhs_type))
4524 	  || (INTEGRAL_TYPE_P (rhs_type)
4525 	      && !type_has_mode_precision_p (rhs_type))))
4526     {
4527       if (dump_enabled_p ())
4528 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4529                          "type conversion to/from bit-precision unsupported."
4530                          "\n");
4531       return false;
4532     }
4533 
4534   /* Check the operands of the operation.  */
4535   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4536     {
4537       if (dump_enabled_p ())
4538 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4539                          "use not simple.\n");
4540       return false;
4541     }
4542   if (op_type == binary_op)
4543     {
4544       bool ok;
4545 
4546       op1 = gimple_assign_rhs2 (stmt);
4547       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4548       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4549 	 OP1.  */
4550       if (CONSTANT_CLASS_P (op0))
4551 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4552       else
4553 	ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4554 
4555       if (!ok)
4556 	{
4557           if (dump_enabled_p ())
4558             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4559                              "use not simple.\n");
4560 	  return false;
4561 	}
4562     }
4563 
4564   /* If op0 is an external or constant defs use a vector type of
4565      the same size as the output vector type.  */
4566   if (!vectype_in)
4567     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4568   if (vec_stmt)
4569     gcc_assert (vectype_in);
4570   if (!vectype_in)
4571     {
4572       if (dump_enabled_p ())
4573 	{
4574 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4575                            "no vectype for scalar type ");
4576 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4577           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4578 	}
4579 
4580       return false;
4581     }
4582 
4583   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4584       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4585     {
4586       if (dump_enabled_p ())
4587 	{
4588 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4589                            "can't convert between boolean and non "
4590 			   "boolean vectors");
4591 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4592           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4593 	}
4594 
4595       return false;
4596     }
4597 
4598   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4599   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4600   if (known_eq (nunits_out, nunits_in))
4601     modifier = NONE;
4602   else if (multiple_p (nunits_out, nunits_in))
4603     modifier = NARROW;
4604   else
4605     {
4606       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4607       modifier = WIDEN;
4608     }
4609 
4610   /* Multiple types in SLP are handled by creating the appropriate number of
4611      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4612      case of SLP.  */
4613   if (slp_node)
4614     ncopies = 1;
4615   else if (modifier == NARROW)
4616     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4617   else
4618     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4619 
4620   /* Sanity check: make sure that at least one copy of the vectorized stmt
4621      needs to be generated.  */
4622   gcc_assert (ncopies >= 1);
4623 
4624   bool found_mode = false;
4625   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4626   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4627   opt_scalar_mode rhs_mode_iter;
4628 
4629   /* Supportable by target?  */
4630   switch (modifier)
4631     {
4632     case NONE:
4633       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4634 	return false;
4635       if (supportable_convert_operation (code, vectype_out, vectype_in,
4636 					 &decl1, &code1))
4637 	break;
4638       /* FALLTHRU */
4639     unsupported:
4640       if (dump_enabled_p ())
4641 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4642                          "conversion not supported by target.\n");
4643       return false;
4644 
4645     case WIDEN:
4646       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4647 					  &code1, &code2, &multi_step_cvt,
4648 					  &interm_types))
4649 	{
4650 	  /* Binary widening operation can only be supported directly by the
4651 	     architecture.  */
4652 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4653 	  break;
4654 	}
4655 
4656       if (code != FLOAT_EXPR
4657 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4658 	goto unsupported;
4659 
4660       fltsz = GET_MODE_SIZE (lhs_mode);
4661       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4662 	{
4663 	  rhs_mode = rhs_mode_iter.require ();
4664 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4665 	    break;
4666 
4667 	  cvt_type
4668 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4669 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4670 	  if (cvt_type == NULL_TREE)
4671 	    goto unsupported;
4672 
4673 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4674 	    {
4675 	      if (!supportable_convert_operation (code, vectype_out,
4676 						  cvt_type, &decl1, &codecvt1))
4677 		goto unsupported;
4678 	    }
4679 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
4680 						    cvt_type, &codecvt1,
4681 						    &codecvt2, &multi_step_cvt,
4682 						    &interm_types))
4683 	    continue;
4684 	  else
4685 	    gcc_assert (multi_step_cvt == 0);
4686 
4687 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4688 					      vectype_in, &code1, &code2,
4689 					      &multi_step_cvt, &interm_types))
4690 	    {
4691 	      found_mode = true;
4692 	      break;
4693 	    }
4694 	}
4695 
4696       if (!found_mode)
4697 	goto unsupported;
4698 
4699       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4700 	codecvt2 = ERROR_MARK;
4701       else
4702 	{
4703 	  multi_step_cvt++;
4704 	  interm_types.safe_push (cvt_type);
4705 	  cvt_type = NULL_TREE;
4706 	}
4707       break;
4708 
4709     case NARROW:
4710       gcc_assert (op_type == unary_op);
4711       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4712 					   &code1, &multi_step_cvt,
4713 					   &interm_types))
4714 	break;
4715 
4716       if (code != FIX_TRUNC_EXPR
4717 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4718 	goto unsupported;
4719 
4720       cvt_type
4721 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4722       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4723       if (cvt_type == NULL_TREE)
4724 	goto unsupported;
4725       if (!supportable_convert_operation (code, cvt_type, vectype_in,
4726 					  &decl1, &codecvt1))
4727 	goto unsupported;
4728       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4729 					   &code1, &multi_step_cvt,
4730 					   &interm_types))
4731 	break;
4732       goto unsupported;
4733 
4734     default:
4735       gcc_unreachable ();
4736     }
4737 
4738   if (!vec_stmt)		/* transformation not required.  */
4739     {
4740       if (dump_enabled_p ())
4741 	dump_printf_loc (MSG_NOTE, vect_location,
4742                          "=== vectorizable_conversion ===\n");
4743       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4744         {
4745 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4746 	  if (!slp_node)
4747 	    vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4748 	}
4749       else if (modifier == NARROW)
4750 	{
4751 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4752 	  if (!slp_node)
4753 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4754 	}
4755       else
4756 	{
4757 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4758 	  if (!slp_node)
4759 	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4760 	}
4761       interm_types.release ();
4762       return true;
4763     }
4764 
4765   /* Transform.  */
4766   if (dump_enabled_p ())
4767     dump_printf_loc (MSG_NOTE, vect_location,
4768                      "transform conversion. ncopies = %d.\n", ncopies);
4769 
4770   if (op_type == binary_op)
4771     {
4772       if (CONSTANT_CLASS_P (op0))
4773 	op0 = fold_convert (TREE_TYPE (op1), op0);
4774       else if (CONSTANT_CLASS_P (op1))
4775 	op1 = fold_convert (TREE_TYPE (op0), op1);
4776     }
4777 
4778   /* In case of multi-step conversion, we first generate conversion operations
4779      to the intermediate types, and then from that types to the final one.
4780      We create vector destinations for the intermediate type (TYPES) received
4781      from supportable_*_operation, and store them in the correct order
4782      for future use in vect_create_vectorized_*_stmts ().  */
4783   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4784   vec_dest = vect_create_destination_var (scalar_dest,
4785 					  (cvt_type && modifier == WIDEN)
4786 					  ? cvt_type : vectype_out);
4787   vec_dsts.quick_push (vec_dest);
4788 
4789   if (multi_step_cvt)
4790     {
4791       for (i = interm_types.length () - 1;
4792 	   interm_types.iterate (i, &intermediate_type); i--)
4793 	{
4794 	  vec_dest = vect_create_destination_var (scalar_dest,
4795 						  intermediate_type);
4796 	  vec_dsts.quick_push (vec_dest);
4797 	}
4798     }
4799 
4800   if (cvt_type)
4801     vec_dest = vect_create_destination_var (scalar_dest,
4802 					    modifier == WIDEN
4803 					    ? vectype_out : cvt_type);
4804 
4805   if (!slp_node)
4806     {
4807       if (modifier == WIDEN)
4808 	{
4809 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4810 	  if (op_type == binary_op)
4811 	    vec_oprnds1.create (1);
4812 	}
4813       else if (modifier == NARROW)
4814 	vec_oprnds0.create (
4815 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4816     }
4817   else if (code == WIDEN_LSHIFT_EXPR)
4818     vec_oprnds1.create (slp_node->vec_stmts_size);
4819 
4820   last_oprnd = op0;
4821   prev_stmt_info = NULL;
4822   switch (modifier)
4823     {
4824     case NONE:
4825       for (j = 0; j < ncopies; j++)
4826 	{
4827 	  if (j == 0)
4828 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4829 	  else
4830 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4831 
4832 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4833 	    {
4834 	      /* Arguments are ready, create the new vector stmt.  */
4835 	      if (code1 == CALL_EXPR)
4836 		{
4837 		  new_stmt = gimple_build_call (decl1, 1, vop0);
4838 		  new_temp = make_ssa_name (vec_dest, new_stmt);
4839 		  gimple_call_set_lhs (new_stmt, new_temp);
4840 		}
4841 	      else
4842 		{
4843 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4844 		  new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4845 		  new_temp = make_ssa_name (vec_dest, new_stmt);
4846 		  gimple_assign_set_lhs (new_stmt, new_temp);
4847 		}
4848 
4849 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4850 	      if (slp_node)
4851 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4852 	      else
4853 		{
4854 		  if (!prev_stmt_info)
4855 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4856 		  else
4857 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4858 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4859 		}
4860 	    }
4861 	}
4862       break;
4863 
4864     case WIDEN:
4865       /* In case the vectorization factor (VF) is bigger than the number
4866 	 of elements that we can fit in a vectype (nunits), we have to
4867 	 generate more than one vector stmt - i.e - we need to "unroll"
4868 	 the vector stmt by a factor VF/nunits.  */
4869       for (j = 0; j < ncopies; j++)
4870 	{
4871 	  /* Handle uses.  */
4872 	  if (j == 0)
4873 	    {
4874 	      if (slp_node)
4875 		{
4876 		  if (code == WIDEN_LSHIFT_EXPR)
4877 		    {
4878 		      unsigned int k;
4879 
4880 		      vec_oprnd1 = op1;
4881 		      /* Store vec_oprnd1 for every vector stmt to be created
4882 			 for SLP_NODE.  We check during the analysis that all
4883 			 the shift arguments are the same.  */
4884 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4885 			vec_oprnds1.quick_push (vec_oprnd1);
4886 
4887 		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4888 					 slp_node);
4889 		    }
4890 		  else
4891 		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4892 				       &vec_oprnds1, slp_node);
4893 		}
4894 	      else
4895 		{
4896 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4897 		  vec_oprnds0.quick_push (vec_oprnd0);
4898 		  if (op_type == binary_op)
4899 		    {
4900 		      if (code == WIDEN_LSHIFT_EXPR)
4901 			vec_oprnd1 = op1;
4902 		      else
4903 			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4904 		      vec_oprnds1.quick_push (vec_oprnd1);
4905 		    }
4906 		}
4907 	    }
4908 	  else
4909 	    {
4910 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4911 	      vec_oprnds0.truncate (0);
4912 	      vec_oprnds0.quick_push (vec_oprnd0);
4913 	      if (op_type == binary_op)
4914 		{
4915 		  if (code == WIDEN_LSHIFT_EXPR)
4916 		    vec_oprnd1 = op1;
4917 		  else
4918 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4919 								 vec_oprnd1);
4920 		  vec_oprnds1.truncate (0);
4921 		  vec_oprnds1.quick_push (vec_oprnd1);
4922 		}
4923 	    }
4924 
4925 	  /* Arguments are ready.  Create the new vector stmts.  */
4926 	  for (i = multi_step_cvt; i >= 0; i--)
4927 	    {
4928 	      tree this_dest = vec_dsts[i];
4929 	      enum tree_code c1 = code1, c2 = code2;
4930 	      if (i == 0 && codecvt2 != ERROR_MARK)
4931 		{
4932 		  c1 = codecvt1;
4933 		  c2 = codecvt2;
4934 		}
4935 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4936 						      &vec_oprnds1,
4937 						      stmt, this_dest, gsi,
4938 						      c1, c2, decl1, decl2,
4939 						      op_type);
4940 	    }
4941 
4942 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4943 	    {
4944 	      if (cvt_type)
4945 		{
4946 		  if (codecvt1 == CALL_EXPR)
4947 		    {
4948 		      new_stmt = gimple_build_call (decl1, 1, vop0);
4949 		      new_temp = make_ssa_name (vec_dest, new_stmt);
4950 		      gimple_call_set_lhs (new_stmt, new_temp);
4951 		    }
4952 		  else
4953 		    {
4954 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4955 		      new_temp = make_ssa_name (vec_dest);
4956 		      new_stmt = gimple_build_assign (new_temp, codecvt1,
4957 						      vop0);
4958 		    }
4959 
4960 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4961 		}
4962 	      else
4963 		new_stmt = SSA_NAME_DEF_STMT (vop0);
4964 
4965 	      if (slp_node)
4966 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4967 	      else
4968 		{
4969 		  if (!prev_stmt_info)
4970 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4971 		  else
4972 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4973 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
4974 		}
4975 	    }
4976 	}
4977 
4978       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4979       break;
4980 
4981     case NARROW:
4982       /* In case the vectorization factor (VF) is bigger than the number
4983 	 of elements that we can fit in a vectype (nunits), we have to
4984 	 generate more than one vector stmt - i.e - we need to "unroll"
4985 	 the vector stmt by a factor VF/nunits.  */
4986       for (j = 0; j < ncopies; j++)
4987 	{
4988 	  /* Handle uses.  */
4989 	  if (slp_node)
4990 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4991 			       slp_node);
4992 	  else
4993 	    {
4994 	      vec_oprnds0.truncate (0);
4995 	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4996 					vect_pow2 (multi_step_cvt) - 1);
4997 	    }
4998 
4999 	  /* Arguments are ready.  Create the new vector stmts.  */
5000 	  if (cvt_type)
5001 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5002 	      {
5003 		if (codecvt1 == CALL_EXPR)
5004 		  {
5005 		    new_stmt = gimple_build_call (decl1, 1, vop0);
5006 		    new_temp = make_ssa_name (vec_dest, new_stmt);
5007 		    gimple_call_set_lhs (new_stmt, new_temp);
5008 		  }
5009 		else
5010 		  {
5011 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5012 		    new_temp = make_ssa_name (vec_dest);
5013 		    new_stmt = gimple_build_assign (new_temp, codecvt1,
5014 						    vop0);
5015 		  }
5016 
5017 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
5018 		vec_oprnds0[i] = new_temp;
5019 	      }
5020 
5021 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5022 						 stmt, vec_dsts, gsi,
5023 						 slp_node, code1,
5024 						 &prev_stmt_info);
5025 	}
5026 
5027       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5028       break;
5029     }
5030 
5031   vec_oprnds0.release ();
5032   vec_oprnds1.release ();
5033   interm_types.release ();
5034 
5035   return true;
5036 }
5037 
5038 
5039 /* Function vectorizable_assignment.
5040 
5041    Check if STMT performs an assignment (copy) that can be vectorized.
5042    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5043    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5044    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5045 
5046 static bool
5047 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5048 			 gimple **vec_stmt, slp_tree slp_node)
5049 {
5050   tree vec_dest;
5051   tree scalar_dest;
5052   tree op;
5053   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5054   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5055   tree new_temp;
5056   gimple *def_stmt;
5057   enum vect_def_type dt[1] = {vect_unknown_def_type};
5058   int ndts = 1;
5059   int ncopies;
5060   int i, j;
5061   vec<tree> vec_oprnds = vNULL;
5062   tree vop;
5063   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5064   vec_info *vinfo = stmt_info->vinfo;
5065   gimple *new_stmt = NULL;
5066   stmt_vec_info prev_stmt_info = NULL;
5067   enum tree_code code;
5068   tree vectype_in;
5069 
5070   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5071     return false;
5072 
5073   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5074       && ! vec_stmt)
5075     return false;
5076 
5077   /* Is vectorizable assignment?  */
5078   if (!is_gimple_assign (stmt))
5079     return false;
5080 
5081   scalar_dest = gimple_assign_lhs (stmt);
5082   if (TREE_CODE (scalar_dest) != SSA_NAME)
5083     return false;
5084 
5085   code = gimple_assign_rhs_code (stmt);
5086   if (gimple_assign_single_p (stmt)
5087       || code == PAREN_EXPR
5088       || CONVERT_EXPR_CODE_P (code))
5089     op = gimple_assign_rhs1 (stmt);
5090   else
5091     return false;
5092 
5093   if (code == VIEW_CONVERT_EXPR)
5094     op = TREE_OPERAND (op, 0);
5095 
5096   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5097   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5098 
5099   /* Multiple types in SLP are handled by creating the appropriate number of
5100      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5101      case of SLP.  */
5102   if (slp_node)
5103     ncopies = 1;
5104   else
5105     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5106 
5107   gcc_assert (ncopies >= 1);
5108 
5109   if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5110     {
5111       if (dump_enabled_p ())
5112         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5113                          "use not simple.\n");
5114       return false;
5115     }
5116 
5117   /* We can handle NOP_EXPR conversions that do not change the number
5118      of elements or the vector size.  */
5119   if ((CONVERT_EXPR_CODE_P (code)
5120        || code == VIEW_CONVERT_EXPR)
5121       && (!vectype_in
5122 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5123 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5124 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5125     return false;
5126 
5127   /* We do not handle bit-precision changes.  */
5128   if ((CONVERT_EXPR_CODE_P (code)
5129        || code == VIEW_CONVERT_EXPR)
5130       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5131       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5132 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5133       /* But a conversion that does not change the bit-pattern is ok.  */
5134       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5135 	    > TYPE_PRECISION (TREE_TYPE (op)))
5136 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
5137       /* Conversion between boolean types of different sizes is
5138 	 a simple assignment in case their vectypes are same
5139 	 boolean vectors.  */
5140       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5141 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5142     {
5143       if (dump_enabled_p ())
5144         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5145                          "type conversion to/from bit-precision "
5146                          "unsupported.\n");
5147       return false;
5148     }
5149 
5150   if (!vec_stmt) /* transformation not required.  */
5151     {
5152       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5153       if (dump_enabled_p ())
5154         dump_printf_loc (MSG_NOTE, vect_location,
5155                          "=== vectorizable_assignment ===\n");
5156       if (!slp_node)
5157 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5158       return true;
5159     }
5160 
5161   /* Transform.  */
5162   if (dump_enabled_p ())
5163     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5164 
5165   /* Handle def.  */
5166   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5167 
5168   /* Handle use.  */
5169   for (j = 0; j < ncopies; j++)
5170     {
5171       /* Handle uses.  */
5172       if (j == 0)
5173         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5174       else
5175         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5176 
5177       /* Arguments are ready. create the new vector stmt.  */
5178       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5179        {
5180 	 if (CONVERT_EXPR_CODE_P (code)
5181 	     || code == VIEW_CONVERT_EXPR)
5182 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5183          new_stmt = gimple_build_assign (vec_dest, vop);
5184          new_temp = make_ssa_name (vec_dest, new_stmt);
5185          gimple_assign_set_lhs (new_stmt, new_temp);
5186          vect_finish_stmt_generation (stmt, new_stmt, gsi);
5187          if (slp_node)
5188            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5189        }
5190 
5191       if (slp_node)
5192         continue;
5193 
5194       if (j == 0)
5195         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5196       else
5197         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5198 
5199       prev_stmt_info = vinfo_for_stmt (new_stmt);
5200     }
5201 
5202   vec_oprnds.release ();
5203   return true;
5204 }
5205 
5206 
5207 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5208    either as shift by a scalar or by a vector.  */
5209 
5210 bool
5211 vect_supportable_shift (enum tree_code code, tree scalar_type)
5212 {
5213 
5214   machine_mode vec_mode;
5215   optab optab;
5216   int icode;
5217   tree vectype;
5218 
5219   vectype = get_vectype_for_scalar_type (scalar_type);
5220   if (!vectype)
5221     return false;
5222 
5223   optab = optab_for_tree_code (code, vectype, optab_scalar);
5224   if (!optab
5225       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5226     {
5227       optab = optab_for_tree_code (code, vectype, optab_vector);
5228       if (!optab
5229           || (optab_handler (optab, TYPE_MODE (vectype))
5230                       == CODE_FOR_nothing))
5231         return false;
5232     }
5233 
5234   vec_mode = TYPE_MODE (vectype);
5235   icode = (int) optab_handler (optab, vec_mode);
5236   if (icode == CODE_FOR_nothing)
5237     return false;
5238 
5239   return true;
5240 }
5241 
5242 
5243 /* Function vectorizable_shift.
5244 
5245    Check if STMT performs a shift operation that can be vectorized.
5246    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5247    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5248    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5249 
5250 static bool
5251 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5252                     gimple **vec_stmt, slp_tree slp_node)
5253 {
5254   tree vec_dest;
5255   tree scalar_dest;
5256   tree op0, op1 = NULL;
5257   tree vec_oprnd1 = NULL_TREE;
5258   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5259   tree vectype;
5260   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5261   enum tree_code code;
5262   machine_mode vec_mode;
5263   tree new_temp;
5264   optab optab;
5265   int icode;
5266   machine_mode optab_op2_mode;
5267   gimple *def_stmt;
5268   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5269   int ndts = 2;
5270   gimple *new_stmt = NULL;
5271   stmt_vec_info prev_stmt_info;
5272   poly_uint64 nunits_in;
5273   poly_uint64 nunits_out;
5274   tree vectype_out;
5275   tree op1_vectype;
5276   int ncopies;
5277   int j, i;
5278   vec<tree> vec_oprnds0 = vNULL;
5279   vec<tree> vec_oprnds1 = vNULL;
5280   tree vop0, vop1;
5281   unsigned int k;
5282   bool scalar_shift_arg = true;
5283   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5284   vec_info *vinfo = stmt_info->vinfo;
5285 
5286   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5287     return false;
5288 
5289   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5290       && ! vec_stmt)
5291     return false;
5292 
5293   /* Is STMT a vectorizable binary/unary operation?   */
5294   if (!is_gimple_assign (stmt))
5295     return false;
5296 
5297   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5298     return false;
5299 
5300   code = gimple_assign_rhs_code (stmt);
5301 
5302   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5303       || code == RROTATE_EXPR))
5304     return false;
5305 
5306   scalar_dest = gimple_assign_lhs (stmt);
5307   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5308   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5309     {
5310       if (dump_enabled_p ())
5311         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5312                          "bit-precision shifts not supported.\n");
5313       return false;
5314     }
5315 
5316   op0 = gimple_assign_rhs1 (stmt);
5317   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5318     {
5319       if (dump_enabled_p ())
5320         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5321                          "use not simple.\n");
5322       return false;
5323     }
5324   /* If op0 is an external or constant def use a vector type with
5325      the same size as the output vector type.  */
5326   if (!vectype)
5327     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5328   if (vec_stmt)
5329     gcc_assert (vectype);
5330   if (!vectype)
5331     {
5332       if (dump_enabled_p ())
5333         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334                          "no vectype for scalar type\n");
5335       return false;
5336     }
5337 
5338   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5339   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5340   if (maybe_ne (nunits_out, nunits_in))
5341     return false;
5342 
5343   op1 = gimple_assign_rhs2 (stmt);
5344   if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5345     {
5346       if (dump_enabled_p ())
5347         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5348                          "use not simple.\n");
5349       return false;
5350     }
5351 
5352   /* Multiple types in SLP are handled by creating the appropriate number of
5353      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5354      case of SLP.  */
5355   if (slp_node)
5356     ncopies = 1;
5357   else
5358     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5359 
5360   gcc_assert (ncopies >= 1);
5361 
5362   /* Determine whether the shift amount is a vector, or scalar.  If the
5363      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5364 
5365   if ((dt[1] == vect_internal_def
5366        || dt[1] == vect_induction_def)
5367       && !slp_node)
5368     scalar_shift_arg = false;
5369   else if (dt[1] == vect_constant_def
5370 	   || dt[1] == vect_external_def
5371 	   || dt[1] == vect_internal_def)
5372     {
5373       /* In SLP, need to check whether the shift count is the same,
5374 	 in loops if it is a constant or invariant, it is always
5375 	 a scalar shift.  */
5376       if (slp_node)
5377 	{
5378 	  vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5379 	  gimple *slpstmt;
5380 
5381 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5382 	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5383 	      scalar_shift_arg = false;
5384 	}
5385 
5386       /* If the shift amount is computed by a pattern stmt we cannot
5387          use the scalar amount directly thus give up and use a vector
5388 	 shift.  */
5389       if (dt[1] == vect_internal_def)
5390 	{
5391 	  gimple *def = SSA_NAME_DEF_STMT (op1);
5392 	  if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5393 	    scalar_shift_arg = false;
5394 	}
5395     }
5396   else
5397     {
5398       if (dump_enabled_p ())
5399         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400                          "operand mode requires invariant argument.\n");
5401       return false;
5402     }
5403 
5404   /* Vector shifted by vector.  */
5405   if (!scalar_shift_arg)
5406     {
5407       optab = optab_for_tree_code (code, vectype, optab_vector);
5408       if (dump_enabled_p ())
5409         dump_printf_loc (MSG_NOTE, vect_location,
5410                          "vector/vector shift/rotate found.\n");
5411 
5412       if (!op1_vectype)
5413 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5414       if (op1_vectype == NULL_TREE
5415 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5416 	{
5417 	  if (dump_enabled_p ())
5418 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5419                              "unusable type for last operand in"
5420                              " vector/vector shift/rotate.\n");
5421 	  return false;
5422 	}
5423     }
5424   /* See if the machine has a vector shifted by scalar insn and if not
5425      then see if it has a vector shifted by vector insn.  */
5426   else
5427     {
5428       optab = optab_for_tree_code (code, vectype, optab_scalar);
5429       if (optab
5430           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5431         {
5432           if (dump_enabled_p ())
5433             dump_printf_loc (MSG_NOTE, vect_location,
5434                              "vector/scalar shift/rotate found.\n");
5435         }
5436       else
5437         {
5438           optab = optab_for_tree_code (code, vectype, optab_vector);
5439           if (optab
5440                && (optab_handler (optab, TYPE_MODE (vectype))
5441                       != CODE_FOR_nothing))
5442             {
5443 	      scalar_shift_arg = false;
5444 
5445               if (dump_enabled_p ())
5446                 dump_printf_loc (MSG_NOTE, vect_location,
5447                                  "vector/vector shift/rotate found.\n");
5448 
5449               /* Unlike the other binary operators, shifts/rotates have
5450                  the rhs being int, instead of the same type as the lhs,
5451                  so make sure the scalar is the right type if we are
5452 		 dealing with vectors of long long/long/short/char.  */
5453               if (dt[1] == vect_constant_def)
5454                 op1 = fold_convert (TREE_TYPE (vectype), op1);
5455 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5456 						   TREE_TYPE (op1)))
5457 		{
5458 		  if (slp_node
5459 		      && TYPE_MODE (TREE_TYPE (vectype))
5460 			 != TYPE_MODE (TREE_TYPE (op1)))
5461 		    {
5462                       if (dump_enabled_p ())
5463                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5464                                          "unusable type for last operand in"
5465                                          " vector/vector shift/rotate.\n");
5466 		      return false;
5467 		    }
5468 		  if (vec_stmt && !slp_node)
5469 		    {
5470 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
5471 		      op1 = vect_init_vector (stmt, op1,
5472 					      TREE_TYPE (vectype), NULL);
5473 		    }
5474 		}
5475             }
5476         }
5477     }
5478 
5479   /* Supportable by target?  */
5480   if (!optab)
5481     {
5482       if (dump_enabled_p ())
5483         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5484                          "no optab.\n");
5485       return false;
5486     }
5487   vec_mode = TYPE_MODE (vectype);
5488   icode = (int) optab_handler (optab, vec_mode);
5489   if (icode == CODE_FOR_nothing)
5490     {
5491       if (dump_enabled_p ())
5492         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5493                          "op not supported by target.\n");
5494       /* Check only during analysis.  */
5495       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5496 	  || (!vec_stmt
5497 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5498         return false;
5499       if (dump_enabled_p ())
5500         dump_printf_loc (MSG_NOTE, vect_location,
5501                          "proceeding using word mode.\n");
5502     }
5503 
5504   /* Worthwhile without SIMD support?  Check only during analysis.  */
5505   if (!vec_stmt
5506       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5507       && !vect_worthwhile_without_simd_p (vinfo, code))
5508     {
5509       if (dump_enabled_p ())
5510         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5511                          "not worthwhile without SIMD support.\n");
5512       return false;
5513     }
5514 
5515   if (!vec_stmt) /* transformation not required.  */
5516     {
5517       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5518       if (dump_enabled_p ())
5519         dump_printf_loc (MSG_NOTE, vect_location,
5520                          "=== vectorizable_shift ===\n");
5521       if (!slp_node)
5522 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5523       return true;
5524     }
5525 
5526   /* Transform.  */
5527 
5528   if (dump_enabled_p ())
5529     dump_printf_loc (MSG_NOTE, vect_location,
5530                      "transform binary/unary operation.\n");
5531 
5532   /* Handle def.  */
5533   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5534 
5535   prev_stmt_info = NULL;
5536   for (j = 0; j < ncopies; j++)
5537     {
5538       /* Handle uses.  */
5539       if (j == 0)
5540         {
5541           if (scalar_shift_arg)
5542             {
5543               /* Vector shl and shr insn patterns can be defined with scalar
5544                  operand 2 (shift operand).  In this case, use constant or loop
5545                  invariant op1 directly, without extending it to vector mode
5546                  first.  */
5547               optab_op2_mode = insn_data[icode].operand[2].mode;
5548               if (!VECTOR_MODE_P (optab_op2_mode))
5549                 {
5550                   if (dump_enabled_p ())
5551                     dump_printf_loc (MSG_NOTE, vect_location,
5552                                      "operand 1 using scalar mode.\n");
5553                   vec_oprnd1 = op1;
5554                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5555                   vec_oprnds1.quick_push (vec_oprnd1);
5556                   if (slp_node)
5557                     {
5558                       /* Store vec_oprnd1 for every vector stmt to be created
5559                          for SLP_NODE.  We check during the analysis that all
5560                          the shift arguments are the same.
5561                          TODO: Allow different constants for different vector
5562                          stmts generated for an SLP instance.  */
5563                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5564                         vec_oprnds1.quick_push (vec_oprnd1);
5565                     }
5566                 }
5567             }
5568 
5569           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5570              (a special case for certain kind of vector shifts); otherwise,
5571              operand 1 should be of a vector type (the usual case).  */
5572           if (vec_oprnd1)
5573             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5574                                slp_node);
5575           else
5576             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5577                                slp_node);
5578         }
5579       else
5580         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5581 
5582       /* Arguments are ready.  Create the new vector stmt.  */
5583       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5584         {
5585           vop1 = vec_oprnds1[i];
5586 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5587           new_temp = make_ssa_name (vec_dest, new_stmt);
5588           gimple_assign_set_lhs (new_stmt, new_temp);
5589           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5590           if (slp_node)
5591             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5592         }
5593 
5594       if (slp_node)
5595         continue;
5596 
5597       if (j == 0)
5598         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5599       else
5600         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5601       prev_stmt_info = vinfo_for_stmt (new_stmt);
5602     }
5603 
5604   vec_oprnds0.release ();
5605   vec_oprnds1.release ();
5606 
5607   return true;
5608 }
5609 
5610 
5611 /* Function vectorizable_operation.
5612 
5613    Check if STMT performs a binary, unary or ternary operation that can
5614    be vectorized.
5615    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5616    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5617    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5618 
5619 static bool
5620 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5621 			gimple **vec_stmt, slp_tree slp_node)
5622 {
5623   tree vec_dest;
5624   tree scalar_dest;
5625   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5626   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5627   tree vectype;
5628   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5629   enum tree_code code, orig_code;
5630   machine_mode vec_mode;
5631   tree new_temp;
5632   int op_type;
5633   optab optab;
5634   bool target_support_p;
5635   gimple *def_stmt;
5636   enum vect_def_type dt[3]
5637     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5638   int ndts = 3;
5639   gimple *new_stmt = NULL;
5640   stmt_vec_info prev_stmt_info;
5641   poly_uint64 nunits_in;
5642   poly_uint64 nunits_out;
5643   tree vectype_out;
5644   int ncopies;
5645   int j, i;
5646   vec<tree> vec_oprnds0 = vNULL;
5647   vec<tree> vec_oprnds1 = vNULL;
5648   vec<tree> vec_oprnds2 = vNULL;
5649   tree vop0, vop1, vop2;
5650   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5651   vec_info *vinfo = stmt_info->vinfo;
5652 
5653   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5654     return false;
5655 
5656   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5657       && ! vec_stmt)
5658     return false;
5659 
5660   /* Is STMT a vectorizable binary/unary operation?   */
5661   if (!is_gimple_assign (stmt))
5662     return false;
5663 
5664   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5665     return false;
5666 
5667   orig_code = code = gimple_assign_rhs_code (stmt);
5668 
5669   /* For pointer addition and subtraction, we should use the normal
5670      plus and minus for the vector operation.  */
5671   if (code == POINTER_PLUS_EXPR)
5672     code = PLUS_EXPR;
5673   if (code == POINTER_DIFF_EXPR)
5674     code = MINUS_EXPR;
5675 
5676   /* Support only unary or binary operations.  */
5677   op_type = TREE_CODE_LENGTH (code);
5678   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5679     {
5680       if (dump_enabled_p ())
5681         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5682                          "num. args = %d (not unary/binary/ternary op).\n",
5683                          op_type);
5684       return false;
5685     }
5686 
5687   scalar_dest = gimple_assign_lhs (stmt);
5688   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5689 
5690   /* Most operations cannot handle bit-precision types without extra
5691      truncations.  */
5692   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5693       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5694       /* Exception are bitwise binary operations.  */
5695       && code != BIT_IOR_EXPR
5696       && code != BIT_XOR_EXPR
5697       && code != BIT_AND_EXPR)
5698     {
5699       if (dump_enabled_p ())
5700         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5701                          "bit-precision arithmetic not supported.\n");
5702       return false;
5703     }
5704 
5705   op0 = gimple_assign_rhs1 (stmt);
5706   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5707     {
5708       if (dump_enabled_p ())
5709         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5710                          "use not simple.\n");
5711       return false;
5712     }
5713   /* If op0 is an external or constant def use a vector type with
5714      the same size as the output vector type.  */
5715   if (!vectype)
5716     {
5717       /* For boolean type we cannot determine vectype by
5718 	 invariant value (don't know whether it is a vector
5719 	 of booleans or vector of integers).  We use output
5720 	 vectype because operations on boolean don't change
5721 	 type.  */
5722       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5723 	{
5724 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5725 	    {
5726 	      if (dump_enabled_p ())
5727 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5728 				 "not supported operation on bool value.\n");
5729 	      return false;
5730 	    }
5731 	  vectype = vectype_out;
5732 	}
5733       else
5734 	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5735     }
5736   if (vec_stmt)
5737     gcc_assert (vectype);
5738   if (!vectype)
5739     {
5740       if (dump_enabled_p ())
5741         {
5742           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5743                            "no vectype for scalar type ");
5744           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5745                              TREE_TYPE (op0));
5746           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5747         }
5748 
5749       return false;
5750     }
5751 
5752   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5753   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5754   if (maybe_ne (nunits_out, nunits_in))
5755     return false;
5756 
5757   if (op_type == binary_op || op_type == ternary_op)
5758     {
5759       op1 = gimple_assign_rhs2 (stmt);
5760       if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5761 	{
5762 	  if (dump_enabled_p ())
5763 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5764                              "use not simple.\n");
5765 	  return false;
5766 	}
5767     }
5768   if (op_type == ternary_op)
5769     {
5770       op2 = gimple_assign_rhs3 (stmt);
5771       if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5772 	{
5773 	  if (dump_enabled_p ())
5774 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5775                              "use not simple.\n");
5776 	  return false;
5777 	}
5778     }
5779 
5780   /* Multiple types in SLP are handled by creating the appropriate number of
5781      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5782      case of SLP.  */
5783   if (slp_node)
5784     ncopies = 1;
5785   else
5786     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5787 
5788   gcc_assert (ncopies >= 1);
5789 
5790   /* Shifts are handled in vectorizable_shift ().  */
5791   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5792       || code == RROTATE_EXPR)
5793    return false;
5794 
5795   /* Supportable by target?  */
5796 
5797   vec_mode = TYPE_MODE (vectype);
5798   if (code == MULT_HIGHPART_EXPR)
5799     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5800   else
5801     {
5802       optab = optab_for_tree_code (code, vectype, optab_default);
5803       if (!optab)
5804 	{
5805           if (dump_enabled_p ())
5806             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5807                              "no optab.\n");
5808 	  return false;
5809 	}
5810       target_support_p = (optab_handler (optab, vec_mode)
5811 			  != CODE_FOR_nothing);
5812     }
5813 
5814   if (!target_support_p)
5815     {
5816       if (dump_enabled_p ())
5817 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5818                          "op not supported by target.\n");
5819       /* Check only during analysis.  */
5820       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5821 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5822         return false;
5823       if (dump_enabled_p ())
5824 	dump_printf_loc (MSG_NOTE, vect_location,
5825                          "proceeding using word mode.\n");
5826     }
5827 
5828   /* Worthwhile without SIMD support?  Check only during analysis.  */
5829   if (!VECTOR_MODE_P (vec_mode)
5830       && !vec_stmt
5831       && !vect_worthwhile_without_simd_p (vinfo, code))
5832     {
5833       if (dump_enabled_p ())
5834         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5835                          "not worthwhile without SIMD support.\n");
5836       return false;
5837     }
5838 
5839   if (!vec_stmt) /* transformation not required.  */
5840     {
5841       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5842       if (dump_enabled_p ())
5843         dump_printf_loc (MSG_NOTE, vect_location,
5844                          "=== vectorizable_operation ===\n");
5845       if (!slp_node)
5846 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5847       return true;
5848     }
5849 
5850   /* Transform.  */
5851 
5852   if (dump_enabled_p ())
5853     dump_printf_loc (MSG_NOTE, vect_location,
5854                      "transform binary/unary operation.\n");
5855 
5856   /* Handle def.  */
5857   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5858 
5859   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5860      vectors with unsigned elements, but the result is signed.  So, we
5861      need to compute the MINUS_EXPR into vectype temporary and
5862      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
5863   tree vec_cvt_dest = NULL_TREE;
5864   if (orig_code == POINTER_DIFF_EXPR)
5865     vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5866 
5867   /* In case the vectorization factor (VF) is bigger than the number
5868      of elements that we can fit in a vectype (nunits), we have to generate
5869      more than one vector stmt - i.e - we need to "unroll" the
5870      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
5871      from one copy of the vector stmt to the next, in the field
5872      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
5873      stages to find the correct vector defs to be used when vectorizing
5874      stmts that use the defs of the current stmt.  The example below
5875      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5876      we need to create 4 vectorized stmts):
5877 
5878      before vectorization:
5879                                 RELATED_STMT    VEC_STMT
5880         S1:     x = memref      -               -
5881         S2:     z = x + 1       -               -
5882 
5883      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5884              there):
5885                                 RELATED_STMT    VEC_STMT
5886         VS1_0:  vx0 = memref0   VS1_1           -
5887         VS1_1:  vx1 = memref1   VS1_2           -
5888         VS1_2:  vx2 = memref2   VS1_3           -
5889         VS1_3:  vx3 = memref3   -               -
5890         S1:     x = load        -               VS1_0
5891         S2:     z = x + 1       -               -
5892 
5893      step2: vectorize stmt S2 (done here):
5894         To vectorize stmt S2 we first need to find the relevant vector
5895         def for the first operand 'x'.  This is, as usual, obtained from
5896         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5897         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
5898         relevant vector def 'vx0'.  Having found 'vx0' we can generate
5899         the vector stmt VS2_0, and as usual, record it in the
5900         STMT_VINFO_VEC_STMT of stmt S2.
5901         When creating the second copy (VS2_1), we obtain the relevant vector
5902         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5903         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
5904         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
5905         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5906         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
5907         chain of stmts and pointers:
5908                                 RELATED_STMT    VEC_STMT
5909         VS1_0:  vx0 = memref0   VS1_1           -
5910         VS1_1:  vx1 = memref1   VS1_2           -
5911         VS1_2:  vx2 = memref2   VS1_3           -
5912         VS1_3:  vx3 = memref3   -               -
5913         S1:     x = load        -               VS1_0
5914         VS2_0:  vz0 = vx0 + v1  VS2_1           -
5915         VS2_1:  vz1 = vx1 + v1  VS2_2           -
5916         VS2_2:  vz2 = vx2 + v1  VS2_3           -
5917         VS2_3:  vz3 = vx3 + v1  -               -
5918         S2:     z = x + 1       -               VS2_0  */
5919 
5920   prev_stmt_info = NULL;
5921   for (j = 0; j < ncopies; j++)
5922     {
5923       /* Handle uses.  */
5924       if (j == 0)
5925 	{
5926 	  if (op_type == binary_op || op_type == ternary_op)
5927 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5928 			       slp_node);
5929 	  else
5930 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5931 			       slp_node);
5932 	  if (op_type == ternary_op)
5933 	    vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5934 			       slp_node);
5935 	}
5936       else
5937 	{
5938 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5939 	  if (op_type == ternary_op)
5940 	    {
5941 	      tree vec_oprnd = vec_oprnds2.pop ();
5942 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5943 							           vec_oprnd));
5944 	    }
5945 	}
5946 
5947       /* Arguments are ready.  Create the new vector stmt.  */
5948       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5949         {
5950 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
5951 		  ? vec_oprnds1[i] : NULL_TREE);
5952 	  vop2 = ((op_type == ternary_op)
5953 		  ? vec_oprnds2[i] : NULL_TREE);
5954 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5955 	  new_temp = make_ssa_name (vec_dest, new_stmt);
5956 	  gimple_assign_set_lhs (new_stmt, new_temp);
5957 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5958 	  if (vec_cvt_dest)
5959 	    {
5960 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5961 	      new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5962 					      new_temp);
5963 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5964 	      gimple_assign_set_lhs (new_stmt, new_temp);
5965 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
5966 	    }
5967           if (slp_node)
5968 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5969         }
5970 
5971       if (slp_node)
5972         continue;
5973 
5974       if (j == 0)
5975 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5976       else
5977 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5978       prev_stmt_info = vinfo_for_stmt (new_stmt);
5979     }
5980 
5981   vec_oprnds0.release ();
5982   vec_oprnds1.release ();
5983   vec_oprnds2.release ();
5984 
5985   return true;
5986 }
5987 
5988 /* A helper function to ensure data reference DR's base alignment.  */
5989 
5990 static void
5991 ensure_base_align (struct data_reference *dr)
5992 {
5993   if (!dr->aux)
5994     return;
5995 
5996   if (DR_VECT_AUX (dr)->base_misaligned)
5997     {
5998       tree base_decl = DR_VECT_AUX (dr)->base_decl;
5999 
6000       unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6001 
6002       if (decl_in_symtab_p (base_decl))
6003 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6004       else
6005 	{
6006 	  SET_DECL_ALIGN (base_decl, align_base_to);
6007           DECL_USER_ALIGN (base_decl) = 1;
6008 	}
6009       DR_VECT_AUX (dr)->base_misaligned = false;
6010     }
6011 }
6012 
6013 
6014 /* Function get_group_alias_ptr_type.
6015 
6016    Return the alias type for the group starting at FIRST_STMT.  */
6017 
6018 static tree
6019 get_group_alias_ptr_type (gimple *first_stmt)
6020 {
6021   struct data_reference *first_dr, *next_dr;
6022   gimple *next_stmt;
6023 
6024   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6025   next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6026   while (next_stmt)
6027     {
6028       next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6029       if (get_alias_set (DR_REF (first_dr))
6030 	  != get_alias_set (DR_REF (next_dr)))
6031 	{
6032 	  if (dump_enabled_p ())
6033 	    dump_printf_loc (MSG_NOTE, vect_location,
6034 			     "conflicting alias set types.\n");
6035 	  return ptr_type_node;
6036 	}
6037       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6038     }
6039   return reference_alias_ptr_type (DR_REF (first_dr));
6040 }
6041 
6042 
6043 /* Function vectorizable_store.
6044 
6045    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6046    can be vectorized.
6047    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6048    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6049    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6050 
6051 static bool
6052 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6053                     slp_tree slp_node)
6054 {
6055   tree data_ref;
6056   tree op;
6057   tree vec_oprnd = NULL_TREE;
6058   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6059   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6060   tree elem_type;
6061   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6062   struct loop *loop = NULL;
6063   machine_mode vec_mode;
6064   tree dummy;
6065   enum dr_alignment_support alignment_support_scheme;
6066   gimple *def_stmt;
6067   enum vect_def_type rhs_dt = vect_unknown_def_type;
6068   enum vect_def_type mask_dt = vect_unknown_def_type;
6069   stmt_vec_info prev_stmt_info = NULL;
6070   tree dataref_ptr = NULL_TREE;
6071   tree dataref_offset = NULL_TREE;
6072   gimple *ptr_incr = NULL;
6073   int ncopies;
6074   int j;
6075   gimple *next_stmt, *first_stmt;
6076   bool grouped_store;
6077   unsigned int group_size, i;
6078   vec<tree> oprnds = vNULL;
6079   vec<tree> result_chain = vNULL;
6080   bool inv_p;
6081   tree offset = NULL_TREE;
6082   vec<tree> vec_oprnds = vNULL;
6083   bool slp = (slp_node != NULL);
6084   unsigned int vec_num;
6085   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6086   vec_info *vinfo = stmt_info->vinfo;
6087   tree aggr_type;
6088   gather_scatter_info gs_info;
6089   gimple *new_stmt;
6090   poly_uint64 vf;
6091   vec_load_store_type vls_type;
6092   tree ref_type;
6093 
6094   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6095     return false;
6096 
6097   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6098       && ! vec_stmt)
6099     return false;
6100 
6101   /* Is vectorizable store? */
6102 
6103   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6104   if (is_gimple_assign (stmt))
6105     {
6106       tree scalar_dest = gimple_assign_lhs (stmt);
6107       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6108 	  && is_pattern_stmt_p (stmt_info))
6109 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
6110       if (TREE_CODE (scalar_dest) != ARRAY_REF
6111 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6112 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
6113 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
6114 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6115 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
6116 	  && TREE_CODE (scalar_dest) != MEM_REF)
6117 	return false;
6118     }
6119   else
6120     {
6121       gcall *call = dyn_cast <gcall *> (stmt);
6122       if (!call || !gimple_call_internal_p (call))
6123 	return false;
6124 
6125       internal_fn ifn = gimple_call_internal_fn (call);
6126       if (!internal_store_fn_p (ifn))
6127 	return false;
6128 
6129       if (slp_node != NULL)
6130 	{
6131 	  if (dump_enabled_p ())
6132 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6133 			     "SLP of masked stores not supported.\n");
6134 	  return false;
6135 	}
6136 
6137       int mask_index = internal_fn_mask_index (ifn);
6138       if (mask_index >= 0)
6139 	{
6140 	  mask = gimple_call_arg (call, mask_index);
6141 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6142 					   &mask_vectype))
6143 	    return false;
6144 	}
6145     }
6146 
6147   op = vect_get_store_rhs (stmt);
6148 
6149   /* Cannot have hybrid store SLP -- that would mean storing to the
6150      same location twice.  */
6151   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6152 
6153   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6154   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6155 
6156   if (loop_vinfo)
6157     {
6158       loop = LOOP_VINFO_LOOP (loop_vinfo);
6159       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6160     }
6161   else
6162     vf = 1;
6163 
6164   /* Multiple types in SLP are handled by creating the appropriate number of
6165      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6166      case of SLP.  */
6167   if (slp)
6168     ncopies = 1;
6169   else
6170     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6171 
6172   gcc_assert (ncopies >= 1);
6173 
6174   /* FORNOW.  This restriction should be relaxed.  */
6175   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6176     {
6177       if (dump_enabled_p ())
6178 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6179 			 "multiple types in nested loop.\n");
6180       return false;
6181     }
6182 
6183   if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6184     return false;
6185 
6186   elem_type = TREE_TYPE (vectype);
6187   vec_mode = TYPE_MODE (vectype);
6188 
6189   if (!STMT_VINFO_DATA_REF (stmt_info))
6190     return false;
6191 
6192   vect_memory_access_type memory_access_type;
6193   if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6194 			    &memory_access_type, &gs_info))
6195     return false;
6196 
6197   if (mask)
6198     {
6199       if (memory_access_type == VMAT_CONTIGUOUS)
6200 	{
6201 	  if (!VECTOR_MODE_P (vec_mode)
6202 	      || !can_vec_mask_load_store_p (vec_mode,
6203 					     TYPE_MODE (mask_vectype), false))
6204 	    return false;
6205 	}
6206       else if (memory_access_type != VMAT_LOAD_STORE_LANES
6207 	       && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6208 	{
6209 	  if (dump_enabled_p ())
6210 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6211 			     "unsupported access type for masked store.\n");
6212 	  return false;
6213 	}
6214     }
6215   else
6216     {
6217       /* FORNOW. In some cases can vectorize even if data-type not supported
6218 	 (e.g. - array initialization with 0).  */
6219       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6220 	return false;
6221     }
6222 
6223   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6224 		   && memory_access_type != VMAT_GATHER_SCATTER
6225 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
6226   if (grouped_store)
6227     {
6228       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6229       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6230       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6231     }
6232   else
6233     {
6234       first_stmt = stmt;
6235       first_dr = dr;
6236       group_size = vec_num = 1;
6237     }
6238 
6239   if (!vec_stmt) /* transformation not required.  */
6240     {
6241       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6242 
6243       if (loop_vinfo
6244 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6245 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6246 				  memory_access_type, &gs_info);
6247 
6248       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6249       /* The SLP costs are calculated during SLP analysis.  */
6250       if (!slp_node)
6251 	vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6252 			       vls_type, NULL, NULL, NULL);
6253       return true;
6254     }
6255   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6256 
6257   /* Transform.  */
6258 
6259   ensure_base_align (dr);
6260 
6261   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6262     {
6263       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6264       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6265       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6266       tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6267       edge pe = loop_preheader_edge (loop);
6268       gimple_seq seq;
6269       basic_block new_bb;
6270       enum { NARROW, NONE, WIDEN } modifier;
6271       poly_uint64 scatter_off_nunits
6272 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6273 
6274       if (known_eq (nunits, scatter_off_nunits))
6275 	modifier = NONE;
6276       else if (known_eq (nunits * 2, scatter_off_nunits))
6277 	{
6278 	  modifier = WIDEN;
6279 
6280 	  /* Currently gathers and scatters are only supported for
6281 	     fixed-length vectors.  */
6282 	  unsigned int count = scatter_off_nunits.to_constant ();
6283 	  vec_perm_builder sel (count, count, 1);
6284 	  for (i = 0; i < (unsigned int) count; ++i)
6285 	    sel.quick_push (i | (count / 2));
6286 
6287 	  vec_perm_indices indices (sel, 1, count);
6288 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6289 						  indices);
6290 	  gcc_assert (perm_mask != NULL_TREE);
6291 	}
6292       else if (known_eq (nunits, scatter_off_nunits * 2))
6293 	{
6294 	  modifier = NARROW;
6295 
6296 	  /* Currently gathers and scatters are only supported for
6297 	     fixed-length vectors.  */
6298 	  unsigned int count = nunits.to_constant ();
6299 	  vec_perm_builder sel (count, count, 1);
6300 	  for (i = 0; i < (unsigned int) count; ++i)
6301 	    sel.quick_push (i | (count / 2));
6302 
6303 	  vec_perm_indices indices (sel, 2, count);
6304 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6305 	  gcc_assert (perm_mask != NULL_TREE);
6306 	  ncopies *= 2;
6307 	}
6308       else
6309 	gcc_unreachable ();
6310 
6311       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6312       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6313       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6314       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6315       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6316       scaletype = TREE_VALUE (arglist);
6317 
6318       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6319 			   && TREE_CODE (rettype) == VOID_TYPE);
6320 
6321       ptr = fold_convert (ptrtype, gs_info.base);
6322       if (!is_gimple_min_invariant (ptr))
6323 	{
6324 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6325 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6326 	  gcc_assert (!new_bb);
6327 	}
6328 
6329       /* Currently we support only unconditional scatter stores,
6330 	 so mask should be all ones.  */
6331       mask = build_int_cst (masktype, -1);
6332       mask = vect_init_vector (stmt, mask, masktype, NULL);
6333 
6334       scale = build_int_cst (scaletype, gs_info.scale);
6335 
6336       prev_stmt_info = NULL;
6337       for (j = 0; j < ncopies; ++j)
6338 	{
6339 	  if (j == 0)
6340 	    {
6341 	      src = vec_oprnd1
6342 		= vect_get_vec_def_for_operand (op, stmt);
6343 	      op = vec_oprnd0
6344 		= vect_get_vec_def_for_operand (gs_info.offset, stmt);
6345 	    }
6346 	  else if (modifier != NONE && (j & 1))
6347 	    {
6348 	      if (modifier == WIDEN)
6349 		{
6350 		  src = vec_oprnd1
6351 		    = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6352 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6353 					     stmt, gsi);
6354 		}
6355 	      else if (modifier == NARROW)
6356 		{
6357 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6358 					      stmt, gsi);
6359 		  op = vec_oprnd0
6360 		    = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6361 						      vec_oprnd0);
6362 		}
6363 	      else
6364 		gcc_unreachable ();
6365 	    }
6366 	  else
6367 	    {
6368 	      src = vec_oprnd1
6369 		= vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6370 	      op = vec_oprnd0
6371 		= vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6372 						  vec_oprnd0);
6373 	    }
6374 
6375 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6376 	    {
6377 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6378 				    TYPE_VECTOR_SUBPARTS (srctype)));
6379 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
6380 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6381 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6382 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6383 	      src = var;
6384 	    }
6385 
6386 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6387 	    {
6388 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6389 				    TYPE_VECTOR_SUBPARTS (idxtype)));
6390 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6391 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6392 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6393 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6394 	      op = var;
6395 	    }
6396 
6397 	  new_stmt
6398 	    = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6399 
6400 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6401 
6402 	  if (prev_stmt_info == NULL)
6403 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6404 	  else
6405 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6406 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
6407 	}
6408       return true;
6409     }
6410 
6411   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6412     {
6413       gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6414       GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6415     }
6416 
6417   if (grouped_store)
6418     {
6419       /* FORNOW */
6420       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6421 
6422       /* We vectorize all the stmts of the interleaving group when we
6423 	 reach the last stmt in the group.  */
6424       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6425 	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6426 	  && !slp)
6427 	{
6428 	  *vec_stmt = NULL;
6429 	  return true;
6430 	}
6431 
6432       if (slp)
6433         {
6434           grouped_store = false;
6435           /* VEC_NUM is the number of vect stmts to be created for this
6436              group.  */
6437           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6438           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6439 	  gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6440           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6441 	  op = vect_get_store_rhs (first_stmt);
6442         }
6443       else
6444         /* VEC_NUM is the number of vect stmts to be created for this
6445            group.  */
6446 	vec_num = group_size;
6447 
6448       ref_type = get_group_alias_ptr_type (first_stmt);
6449     }
6450   else
6451     ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6452 
6453   if (dump_enabled_p ())
6454     dump_printf_loc (MSG_NOTE, vect_location,
6455                      "transform store. ncopies = %d\n", ncopies);
6456 
6457   if (memory_access_type == VMAT_ELEMENTWISE
6458       || memory_access_type == VMAT_STRIDED_SLP)
6459     {
6460       gimple_stmt_iterator incr_gsi;
6461       bool insert_after;
6462       gimple *incr;
6463       tree offvar;
6464       tree ivstep;
6465       tree running_off;
6466       tree stride_base, stride_step, alias_off;
6467       tree vec_oprnd;
6468       unsigned int g;
6469       /* Checked by get_load_store_type.  */
6470       unsigned int const_nunits = nunits.to_constant ();
6471 
6472       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6473       gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6474 
6475       stride_base
6476 	= fold_build_pointer_plus
6477 	    (DR_BASE_ADDRESS (first_dr),
6478 	     size_binop (PLUS_EXPR,
6479 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6480 			 convert_to_ptrofftype (DR_INIT (first_dr))));
6481       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6482 
6483       /* For a store with loop-invariant (but other than power-of-2)
6484          stride (i.e. not a grouped access) like so:
6485 
6486 	   for (i = 0; i < n; i += stride)
6487 	     array[i] = ...;
6488 
6489 	 we generate a new induction variable and new stores from
6490 	 the components of the (vectorized) rhs:
6491 
6492 	   for (j = 0; ; j += VF*stride)
6493 	     vectemp = ...;
6494 	     tmp1 = vectemp[0];
6495 	     array[j] = tmp1;
6496 	     tmp2 = vectemp[1];
6497 	     array[j + stride] = tmp2;
6498 	     ...
6499          */
6500 
6501       unsigned nstores = const_nunits;
6502       unsigned lnel = 1;
6503       tree ltype = elem_type;
6504       tree lvectype = vectype;
6505       if (slp)
6506 	{
6507 	  if (group_size < const_nunits
6508 	      && const_nunits % group_size == 0)
6509 	    {
6510 	      nstores = const_nunits / group_size;
6511 	      lnel = group_size;
6512 	      ltype = build_vector_type (elem_type, group_size);
6513 	      lvectype = vectype;
6514 
6515 	      /* First check if vec_extract optab doesn't support extraction
6516 		 of vector elts directly.  */
6517 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6518 	      machine_mode vmode;
6519 	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
6520 		  || !VECTOR_MODE_P (vmode)
6521 		  || !targetm.vector_mode_supported_p (vmode)
6522 		  || (convert_optab_handler (vec_extract_optab,
6523 					     TYPE_MODE (vectype), vmode)
6524 		      == CODE_FOR_nothing))
6525 		{
6526 		  /* Try to avoid emitting an extract of vector elements
6527 		     by performing the extracts using an integer type of the
6528 		     same size, extracting from a vector of those and then
6529 		     re-interpreting it as the original vector type if
6530 		     supported.  */
6531 		  unsigned lsize
6532 		    = group_size * GET_MODE_BITSIZE (elmode);
6533 		  elmode = int_mode_for_size (lsize, 0).require ();
6534 		  unsigned int lnunits = const_nunits / group_size;
6535 		  /* If we can't construct such a vector fall back to
6536 		     element extracts from the original vector type and
6537 		     element size stores.  */
6538 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
6539 		      && VECTOR_MODE_P (vmode)
6540 		      && targetm.vector_mode_supported_p (vmode)
6541 		      && (convert_optab_handler (vec_extract_optab,
6542 						 vmode, elmode)
6543 			  != CODE_FOR_nothing))
6544 		    {
6545 		      nstores = lnunits;
6546 		      lnel = group_size;
6547 		      ltype = build_nonstandard_integer_type (lsize, 1);
6548 		      lvectype = build_vector_type (ltype, nstores);
6549 		    }
6550 		  /* Else fall back to vector extraction anyway.
6551 		     Fewer stores are more important than avoiding spilling
6552 		     of the vector we extract from.  Compared to the
6553 		     construction case in vectorizable_load no store-forwarding
6554 		     issue exists here for reasonable archs.  */
6555 		}
6556 	    }
6557 	  else if (group_size >= const_nunits
6558 		   && group_size % const_nunits == 0)
6559 	    {
6560 	      nstores = 1;
6561 	      lnel = const_nunits;
6562 	      ltype = vectype;
6563 	      lvectype = vectype;
6564 	    }
6565 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6566 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6567 	}
6568 
6569       ivstep = stride_step;
6570       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6571 			    build_int_cst (TREE_TYPE (ivstep), vf));
6572 
6573       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6574 
6575       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6576       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6577       create_iv (stride_base, ivstep, NULL,
6578 		 loop, &incr_gsi, insert_after,
6579 		 &offvar, NULL);
6580       incr = gsi_stmt (incr_gsi);
6581       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6582 
6583       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6584 
6585       prev_stmt_info = NULL;
6586       alias_off = build_int_cst (ref_type, 0);
6587       next_stmt = first_stmt;
6588       for (g = 0; g < group_size; g++)
6589 	{
6590 	  running_off = offvar;
6591 	  if (g)
6592 	    {
6593 	      tree size = TYPE_SIZE_UNIT (ltype);
6594 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6595 				      size);
6596 	      tree newoff = copy_ssa_name (running_off, NULL);
6597 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6598 					  running_off, pos);
6599 	      vect_finish_stmt_generation (stmt, incr, gsi);
6600 	      running_off = newoff;
6601 	    }
6602 	  unsigned int group_el = 0;
6603 	  unsigned HOST_WIDE_INT
6604 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6605 	  for (j = 0; j < ncopies; j++)
6606 	    {
6607 	      /* We've set op and dt above, from vect_get_store_rhs,
6608 		 and first_stmt == stmt.  */
6609 	      if (j == 0)
6610 		{
6611 		  if (slp)
6612 		    {
6613 		      vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6614 					 slp_node);
6615 		      vec_oprnd = vec_oprnds[0];
6616 		    }
6617 		  else
6618 		    {
6619 		      op = vect_get_store_rhs (next_stmt);
6620 		      vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6621 		    }
6622 		}
6623 	      else
6624 		{
6625 		  if (slp)
6626 		    vec_oprnd = vec_oprnds[j];
6627 		  else
6628 		    {
6629 		      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6630 		      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6631 								  vec_oprnd);
6632 		    }
6633 		}
6634 	      /* Pun the vector to extract from if necessary.  */
6635 	      if (lvectype != vectype)
6636 		{
6637 		  tree tem = make_ssa_name (lvectype);
6638 		  gimple *pun
6639 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6640 							lvectype, vec_oprnd));
6641 		  vect_finish_stmt_generation (stmt, pun, gsi);
6642 		  vec_oprnd = tem;
6643 		}
6644 	      for (i = 0; i < nstores; i++)
6645 		{
6646 		  tree newref, newoff;
6647 		  gimple *incr, *assign;
6648 		  tree size = TYPE_SIZE (ltype);
6649 		  /* Extract the i'th component.  */
6650 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6651 					  bitsize_int (i), size);
6652 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6653 					   size, pos);
6654 
6655 		  elem = force_gimple_operand_gsi (gsi, elem, true,
6656 						   NULL_TREE, true,
6657 						   GSI_SAME_STMT);
6658 
6659 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
6660 						 group_el * elsz);
6661 		  newref = build2 (MEM_REF, ltype,
6662 				   running_off, this_off);
6663 		  vect_copy_ref_info (newref, DR_REF (first_dr));
6664 
6665 		  /* And store it to *running_off.  */
6666 		  assign = gimple_build_assign (newref, elem);
6667 		  vect_finish_stmt_generation (stmt, assign, gsi);
6668 
6669 		  group_el += lnel;
6670 		  if (! slp
6671 		      || group_el == group_size)
6672 		    {
6673 		      newoff = copy_ssa_name (running_off, NULL);
6674 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6675 						  running_off, stride_step);
6676 		      vect_finish_stmt_generation (stmt, incr, gsi);
6677 
6678 		      running_off = newoff;
6679 		      group_el = 0;
6680 		    }
6681 		  if (g == group_size - 1
6682 		      && !slp)
6683 		    {
6684 		      if (j == 0 && i == 0)
6685 			STMT_VINFO_VEC_STMT (stmt_info)
6686 			    = *vec_stmt = assign;
6687 		      else
6688 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6689 		      prev_stmt_info = vinfo_for_stmt (assign);
6690 		    }
6691 		}
6692 	    }
6693 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6694 	  if (slp)
6695 	    break;
6696 	}
6697 
6698       vec_oprnds.release ();
6699       return true;
6700     }
6701 
6702   auto_vec<tree> dr_chain (group_size);
6703   oprnds.create (group_size);
6704 
6705   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6706   gcc_assert (alignment_support_scheme);
6707   vec_loop_masks *loop_masks
6708     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6709        ? &LOOP_VINFO_MASKS (loop_vinfo)
6710        : NULL);
6711   /* Targets with store-lane instructions must not require explicit
6712      realignment.  vect_supportable_dr_alignment always returns either
6713      dr_aligned or dr_unaligned_supported for masked operations.  */
6714   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6715 	       && !mask
6716 	       && !loop_masks)
6717 	      || alignment_support_scheme == dr_aligned
6718 	      || alignment_support_scheme == dr_unaligned_supported);
6719 
6720   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6721       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6722     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6723 
6724   tree bump;
6725   tree vec_offset = NULL_TREE;
6726   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6727     {
6728       aggr_type = NULL_TREE;
6729       bump = NULL_TREE;
6730     }
6731   else if (memory_access_type == VMAT_GATHER_SCATTER)
6732     {
6733       aggr_type = elem_type;
6734       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6735 				       &bump, &vec_offset);
6736     }
6737   else
6738     {
6739       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6740 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6741       else
6742 	aggr_type = vectype;
6743       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6744     }
6745 
6746   if (mask)
6747     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6748 
6749   /* In case the vectorization factor (VF) is bigger than the number
6750      of elements that we can fit in a vectype (nunits), we have to generate
6751      more than one vector stmt - i.e - we need to "unroll" the
6752      vector stmt by a factor VF/nunits.  For more details see documentation in
6753      vect_get_vec_def_for_copy_stmt.  */
6754 
6755   /* In case of interleaving (non-unit grouped access):
6756 
6757         S1:  &base + 2 = x2
6758         S2:  &base = x0
6759         S3:  &base + 1 = x1
6760         S4:  &base + 3 = x3
6761 
6762      We create vectorized stores starting from base address (the access of the
6763      first stmt in the chain (S2 in the above example), when the last store stmt
6764      of the chain (S4) is reached:
6765 
6766         VS1: &base = vx2
6767 	VS2: &base + vec_size*1 = vx0
6768 	VS3: &base + vec_size*2 = vx1
6769 	VS4: &base + vec_size*3 = vx3
6770 
6771      Then permutation statements are generated:
6772 
6773 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6774 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6775 	...
6776 
6777      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6778      (the order of the data-refs in the output of vect_permute_store_chain
6779      corresponds to the order of scalar stmts in the interleaving chain - see
6780      the documentation of vect_permute_store_chain()).
6781 
6782      In case of both multiple types and interleaving, above vector stores and
6783      permutation stmts are created for every copy.  The result vector stmts are
6784      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6785      STMT_VINFO_RELATED_STMT for the next copies.
6786   */
6787 
6788   prev_stmt_info = NULL;
6789   tree vec_mask = NULL_TREE;
6790   for (j = 0; j < ncopies; j++)
6791     {
6792 
6793       if (j == 0)
6794 	{
6795           if (slp)
6796             {
6797 	      /* Get vectorized arguments for SLP_NODE.  */
6798               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6799                                  NULL, slp_node);
6800 
6801               vec_oprnd = vec_oprnds[0];
6802             }
6803           else
6804             {
6805 	      /* For interleaved stores we collect vectorized defs for all the
6806 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6807 		 used as an input to vect_permute_store_chain(), and OPRNDS as
6808 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6809 
6810 		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6811 		 OPRNDS are of size 1.  */
6812 	      next_stmt = first_stmt;
6813 	      for (i = 0; i < group_size; i++)
6814 		{
6815 		  /* Since gaps are not supported for interleaved stores,
6816 		     GROUP_SIZE is the exact number of stmts in the chain.
6817 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
6818 		     there is no interleaving, GROUP_SIZE is 1, and only one
6819 		     iteration of the loop will be executed.  */
6820 		  op = vect_get_store_rhs (next_stmt);
6821 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6822 		  dr_chain.quick_push (vec_oprnd);
6823 		  oprnds.quick_push (vec_oprnd);
6824 		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6825 		}
6826 	      if (mask)
6827 		vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6828 							 mask_vectype);
6829 	    }
6830 
6831 	  /* We should have catched mismatched types earlier.  */
6832 	  gcc_assert (useless_type_conversion_p (vectype,
6833 						 TREE_TYPE (vec_oprnd)));
6834 	  bool simd_lane_access_p
6835 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6836 	  if (simd_lane_access_p
6837 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6838 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6839 	      && integer_zerop (DR_OFFSET (first_dr))
6840 	      && integer_zerop (DR_INIT (first_dr))
6841 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
6842 					get_alias_set (TREE_TYPE (ref_type))))
6843 	    {
6844 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6845 	      dataref_offset = build_int_cst (ref_type, 0);
6846 	      inv_p = false;
6847 	    }
6848 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6849 	    {
6850 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6851 					   &dataref_ptr, &vec_offset);
6852 	      inv_p = false;
6853 	    }
6854 	  else
6855 	    dataref_ptr
6856 	      = vect_create_data_ref_ptr (first_stmt, aggr_type,
6857 					  simd_lane_access_p ? loop : NULL,
6858 					  offset, &dummy, gsi, &ptr_incr,
6859 					  simd_lane_access_p, &inv_p,
6860 					  NULL_TREE, bump);
6861 	  gcc_assert (bb_vinfo || !inv_p);
6862 	}
6863       else
6864 	{
6865 	  /* For interleaved stores we created vectorized defs for all the
6866 	     defs stored in OPRNDS in the previous iteration (previous copy).
6867 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
6868 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6869 	     next copy.
6870 	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6871 	     OPRNDS are of size 1.  */
6872 	  for (i = 0; i < group_size; i++)
6873 	    {
6874 	      op = oprnds[i];
6875 	      vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6876 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
6877 	      dr_chain[i] = vec_oprnd;
6878 	      oprnds[i] = vec_oprnd;
6879 	    }
6880 	  if (mask)
6881 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
6882 	  if (dataref_offset)
6883 	    dataref_offset
6884 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6885 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6886 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6887 							 vec_offset);
6888 	  else
6889 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6890 					   bump);
6891 	}
6892 
6893       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6894 	{
6895 	  tree vec_array;
6896 
6897 	  /* Combine all the vectors into an array.  */
6898 	  vec_array = create_vector_array (vectype, vec_num);
6899 	  for (i = 0; i < vec_num; i++)
6900 	    {
6901 	      vec_oprnd = dr_chain[i];
6902 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6903 	    }
6904 
6905 	  tree final_mask = NULL;
6906 	  if (loop_masks)
6907 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6908 					     vectype, j);
6909 	  if (vec_mask)
6910 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6911 						  vec_mask, gsi);
6912 
6913 	  gcall *call;
6914 	  if (final_mask)
6915 	    {
6916 	      /* Emit:
6917 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6918 				     VEC_ARRAY).  */
6919 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6920 	      tree alias_ptr = build_int_cst (ref_type, align);
6921 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6922 						 dataref_ptr, alias_ptr,
6923 						 final_mask, vec_array);
6924 	    }
6925 	  else
6926 	    {
6927 	      /* Emit:
6928 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
6929 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6930 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6931 						 vec_array);
6932 	      gimple_call_set_lhs (call, data_ref);
6933 	    }
6934 	  gimple_call_set_nothrow (call, true);
6935 	  new_stmt = call;
6936 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6937 	}
6938       else
6939 	{
6940 	  new_stmt = NULL;
6941 	  if (grouped_store)
6942 	    {
6943 	      if (j == 0)
6944 		result_chain.create (group_size);
6945 	      /* Permute.  */
6946 	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6947 					&result_chain);
6948 	    }
6949 
6950 	  next_stmt = first_stmt;
6951 	  for (i = 0; i < vec_num; i++)
6952 	    {
6953 	      unsigned align, misalign;
6954 
6955 	      tree final_mask = NULL_TREE;
6956 	      if (loop_masks)
6957 		final_mask = vect_get_loop_mask (gsi, loop_masks,
6958 						 vec_num * ncopies,
6959 						 vectype, vec_num * j + i);
6960 	      if (vec_mask)
6961 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6962 						      vec_mask, gsi);
6963 
6964 	      if (memory_access_type == VMAT_GATHER_SCATTER)
6965 		{
6966 		  tree scale = size_int (gs_info.scale);
6967 		  gcall *call;
6968 		  if (loop_masks)
6969 		    call = gimple_build_call_internal
6970 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
6971 		       scale, vec_oprnd, final_mask);
6972 		  else
6973 		    call = gimple_build_call_internal
6974 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
6975 		       scale, vec_oprnd);
6976 		  gimple_call_set_nothrow (call, true);
6977 		  new_stmt = call;
6978 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6979 		  break;
6980 		}
6981 
6982 	      if (i > 0)
6983 		/* Bump the vector pointer.  */
6984 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6985 					       stmt, bump);
6986 
6987 	      if (slp)
6988 		vec_oprnd = vec_oprnds[i];
6989 	      else if (grouped_store)
6990 		/* For grouped stores vectorized defs are interleaved in
6991 		   vect_permute_store_chain().  */
6992 		vec_oprnd = result_chain[i];
6993 
6994 	      align = DR_TARGET_ALIGNMENT (first_dr);
6995 	      if (aligned_access_p (first_dr))
6996 		misalign = 0;
6997 	      else if (DR_MISALIGNMENT (first_dr) == -1)
6998 		{
6999 		  align = dr_alignment (vect_dr_behavior (first_dr));
7000 		  misalign = 0;
7001 		}
7002 	      else
7003 		misalign = DR_MISALIGNMENT (first_dr);
7004 	      if (dataref_offset == NULL_TREE
7005 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
7006 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7007 					misalign);
7008 
7009 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7010 		{
7011 		  tree perm_mask = perm_mask_for_reverse (vectype);
7012 		  tree perm_dest
7013 		    = vect_create_destination_var (vect_get_store_rhs (stmt),
7014 						   vectype);
7015 		  tree new_temp = make_ssa_name (perm_dest);
7016 
7017 		  /* Generate the permute statement.  */
7018 		  gimple *perm_stmt
7019 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7020 					   vec_oprnd, perm_mask);
7021 		  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7022 
7023 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7024 		  vec_oprnd = new_temp;
7025 		}
7026 
7027 	      /* Arguments are ready.  Create the new vector stmt.  */
7028 	      if (final_mask)
7029 		{
7030 		  align = least_bit_hwi (misalign | align);
7031 		  tree ptr = build_int_cst (ref_type, align);
7032 		  gcall *call
7033 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
7034 						  dataref_ptr, ptr,
7035 						  final_mask, vec_oprnd);
7036 		  gimple_call_set_nothrow (call, true);
7037 		  new_stmt = call;
7038 		}
7039 	      else
7040 		{
7041 		  data_ref = fold_build2 (MEM_REF, vectype,
7042 					  dataref_ptr,
7043 					  dataref_offset
7044 					  ? dataref_offset
7045 					  : build_int_cst (ref_type, 0));
7046 		  if (aligned_access_p (first_dr))
7047 		    ;
7048 		  else if (DR_MISALIGNMENT (first_dr) == -1)
7049 		    TREE_TYPE (data_ref)
7050 		      = build_aligned_type (TREE_TYPE (data_ref),
7051 					    align * BITS_PER_UNIT);
7052 		  else
7053 		    TREE_TYPE (data_ref)
7054 		      = build_aligned_type (TREE_TYPE (data_ref),
7055 					    TYPE_ALIGN (elem_type));
7056 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
7057 		  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7058 		}
7059 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
7060 
7061 	      if (slp)
7062 		continue;
7063 
7064 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7065 	      if (!next_stmt)
7066 		break;
7067 	    }
7068 	}
7069       if (!slp)
7070 	{
7071 	  if (j == 0)
7072 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7073 	  else
7074 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7075 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
7076 	}
7077     }
7078 
7079   oprnds.release ();
7080   result_chain.release ();
7081   vec_oprnds.release ();
7082 
7083   return true;
7084 }
7085 
7086 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7087    VECTOR_CST mask.  No checks are made that the target platform supports the
7088    mask, so callers may wish to test can_vec_perm_const_p separately, or use
7089    vect_gen_perm_mask_checked.  */
7090 
7091 tree
7092 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7093 {
7094   tree mask_type;
7095 
7096   poly_uint64 nunits = sel.length ();
7097   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7098 
7099   mask_type = build_vector_type (ssizetype, nunits);
7100   return vec_perm_indices_to_tree (mask_type, sel);
7101 }
7102 
7103 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
7104    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
7105 
7106 tree
7107 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7108 {
7109   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7110   return vect_gen_perm_mask_any (vectype, sel);
7111 }
7112 
7113 /* Given a vector variable X and Y, that was generated for the scalar
7114    STMT, generate instructions to permute the vector elements of X and Y
7115    using permutation mask MASK_VEC, insert them at *GSI and return the
7116    permuted vector variable.  */
7117 
7118 static tree
7119 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7120 		      gimple_stmt_iterator *gsi)
7121 {
7122   tree vectype = TREE_TYPE (x);
7123   tree perm_dest, data_ref;
7124   gimple *perm_stmt;
7125 
7126   tree scalar_dest = gimple_get_lhs (stmt);
7127   if (TREE_CODE (scalar_dest) == SSA_NAME)
7128     perm_dest = vect_create_destination_var (scalar_dest, vectype);
7129   else
7130     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7131   data_ref = make_ssa_name (perm_dest);
7132 
7133   /* Generate the permute statement.  */
7134   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7135   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7136 
7137   return data_ref;
7138 }
7139 
7140 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7141    inserting them on the loops preheader edge.  Returns true if we
7142    were successful in doing so (and thus STMT can be moved then),
7143    otherwise returns false.  */
7144 
7145 static bool
7146 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7147 {
7148   ssa_op_iter i;
7149   tree op;
7150   bool any = false;
7151 
7152   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7153     {
7154       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7155       if (!gimple_nop_p (def_stmt)
7156 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7157 	{
7158 	  /* Make sure we don't need to recurse.  While we could do
7159 	     so in simple cases when there are more complex use webs
7160 	     we don't have an easy way to preserve stmt order to fulfil
7161 	     dependencies within them.  */
7162 	  tree op2;
7163 	  ssa_op_iter i2;
7164 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
7165 	    return false;
7166 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7167 	    {
7168 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7169 	      if (!gimple_nop_p (def_stmt2)
7170 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7171 		return false;
7172 	    }
7173 	  any = true;
7174 	}
7175     }
7176 
7177   if (!any)
7178     return true;
7179 
7180   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7181     {
7182       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7183       if (!gimple_nop_p (def_stmt)
7184 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7185 	{
7186 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7187 	  gsi_remove (&gsi, false);
7188 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7189 	}
7190     }
7191 
7192   return true;
7193 }
7194 
7195 /* vectorizable_load.
7196 
7197    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7198    can be vectorized.
7199    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7200    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7201    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
7202 
7203 static bool
7204 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7205                    slp_tree slp_node, slp_instance slp_node_instance)
7206 {
7207   tree scalar_dest;
7208   tree vec_dest = NULL;
7209   tree data_ref = NULL;
7210   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7211   stmt_vec_info prev_stmt_info;
7212   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7213   struct loop *loop = NULL;
7214   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7215   bool nested_in_vect_loop = false;
7216   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7217   tree elem_type;
7218   tree new_temp;
7219   machine_mode mode;
7220   gimple *new_stmt = NULL;
7221   tree dummy;
7222   enum dr_alignment_support alignment_support_scheme;
7223   tree dataref_ptr = NULL_TREE;
7224   tree dataref_offset = NULL_TREE;
7225   gimple *ptr_incr = NULL;
7226   int ncopies;
7227   int i, j;
7228   unsigned int group_size;
7229   poly_uint64 group_gap_adj;
7230   tree msq = NULL_TREE, lsq;
7231   tree offset = NULL_TREE;
7232   tree byte_offset = NULL_TREE;
7233   tree realignment_token = NULL_TREE;
7234   gphi *phi = NULL;
7235   vec<tree> dr_chain = vNULL;
7236   bool grouped_load = false;
7237   gimple *first_stmt;
7238   gimple *first_stmt_for_drptr = NULL;
7239   bool inv_p;
7240   bool compute_in_loop = false;
7241   struct loop *at_loop;
7242   int vec_num;
7243   bool slp = (slp_node != NULL);
7244   bool slp_perm = false;
7245   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7246   poly_uint64 vf;
7247   tree aggr_type;
7248   gather_scatter_info gs_info;
7249   vec_info *vinfo = stmt_info->vinfo;
7250   tree ref_type;
7251   enum vect_def_type mask_dt = vect_unknown_def_type;
7252 
7253   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7254     return false;
7255 
7256   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7257       && ! vec_stmt)
7258     return false;
7259 
7260   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7261   if (is_gimple_assign (stmt))
7262     {
7263       scalar_dest = gimple_assign_lhs (stmt);
7264       if (TREE_CODE (scalar_dest) != SSA_NAME)
7265 	return false;
7266 
7267       tree_code code = gimple_assign_rhs_code (stmt);
7268       if (code != ARRAY_REF
7269 	  && code != BIT_FIELD_REF
7270 	  && code != INDIRECT_REF
7271 	  && code != COMPONENT_REF
7272 	  && code != IMAGPART_EXPR
7273 	  && code != REALPART_EXPR
7274 	  && code != MEM_REF
7275 	  && TREE_CODE_CLASS (code) != tcc_declaration)
7276 	return false;
7277     }
7278   else
7279     {
7280       gcall *call = dyn_cast <gcall *> (stmt);
7281       if (!call || !gimple_call_internal_p (call))
7282 	return false;
7283 
7284       internal_fn ifn = gimple_call_internal_fn (call);
7285       if (!internal_load_fn_p (ifn))
7286 	return false;
7287 
7288       scalar_dest = gimple_call_lhs (call);
7289       if (!scalar_dest)
7290 	return false;
7291 
7292       if (slp_node != NULL)
7293 	{
7294 	  if (dump_enabled_p ())
7295 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7296 			     "SLP of masked loads not supported.\n");
7297 	  return false;
7298 	}
7299 
7300       int mask_index = internal_fn_mask_index (ifn);
7301       if (mask_index >= 0)
7302 	{
7303 	  mask = gimple_call_arg (call, mask_index);
7304 	  if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7305 					   &mask_vectype))
7306 	    return false;
7307 	}
7308     }
7309 
7310   if (!STMT_VINFO_DATA_REF (stmt_info))
7311     return false;
7312 
7313   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7314   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7315 
7316   if (loop_vinfo)
7317     {
7318       loop = LOOP_VINFO_LOOP (loop_vinfo);
7319       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7320       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7321     }
7322   else
7323     vf = 1;
7324 
7325   /* Multiple types in SLP are handled by creating the appropriate number of
7326      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7327      case of SLP.  */
7328   if (slp)
7329     ncopies = 1;
7330   else
7331     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7332 
7333   gcc_assert (ncopies >= 1);
7334 
7335   /* FORNOW. This restriction should be relaxed.  */
7336   if (nested_in_vect_loop && ncopies > 1)
7337     {
7338       if (dump_enabled_p ())
7339         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7340                          "multiple types in nested loop.\n");
7341       return false;
7342     }
7343 
7344   /* Invalidate assumptions made by dependence analysis when vectorization
7345      on the unrolled body effectively re-orders stmts.  */
7346   if (ncopies > 1
7347       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7348       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7349 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7350     {
7351       if (dump_enabled_p ())
7352 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7353 			 "cannot perform implicit CSE when unrolling "
7354 			 "with negative dependence distance\n");
7355       return false;
7356     }
7357 
7358   elem_type = TREE_TYPE (vectype);
7359   mode = TYPE_MODE (vectype);
7360 
7361   /* FORNOW. In some cases can vectorize even if data-type not supported
7362     (e.g. - data copies).  */
7363   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7364     {
7365       if (dump_enabled_p ())
7366         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7367                          "Aligned load, but unsupported type.\n");
7368       return false;
7369     }
7370 
7371   /* Check if the load is a part of an interleaving chain.  */
7372   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7373     {
7374       grouped_load = true;
7375       /* FORNOW */
7376       gcc_assert (!nested_in_vect_loop);
7377       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7378 
7379       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7380       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7381 
7382       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7383 	slp_perm = true;
7384 
7385       /* Invalidate assumptions made by dependence analysis when vectorization
7386 	 on the unrolled body effectively re-orders stmts.  */
7387       if (!PURE_SLP_STMT (stmt_info)
7388 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7389 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7390 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7391 	{
7392 	  if (dump_enabled_p ())
7393 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7394 			     "cannot perform implicit CSE when performing "
7395 			     "group loads with negative dependence distance\n");
7396 	  return false;
7397 	}
7398 
7399       /* Similarly when the stmt is a load that is both part of a SLP
7400          instance and a loop vectorized stmt via the same-dr mechanism
7401 	 we have to give up.  */
7402       if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7403 	  && (STMT_SLP_TYPE (stmt_info)
7404 	      != STMT_SLP_TYPE (vinfo_for_stmt
7405 				 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7406 	{
7407 	  if (dump_enabled_p ())
7408 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7409 			     "conflicting SLP types for CSEd load\n");
7410 	  return false;
7411 	}
7412     }
7413   else
7414     group_size = 1;
7415 
7416   vect_memory_access_type memory_access_type;
7417   if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7418 			    &memory_access_type, &gs_info))
7419     return false;
7420 
7421   if (mask)
7422     {
7423       if (memory_access_type == VMAT_CONTIGUOUS)
7424 	{
7425 	  machine_mode vec_mode = TYPE_MODE (vectype);
7426 	  if (!VECTOR_MODE_P (vec_mode)
7427 	      || !can_vec_mask_load_store_p (vec_mode,
7428 					     TYPE_MODE (mask_vectype), true))
7429 	    return false;
7430 	}
7431       else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7432 	{
7433 	  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7434 	  tree masktype
7435 	    = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7436 	  if (TREE_CODE (masktype) == INTEGER_TYPE)
7437 	    {
7438 	      if (dump_enabled_p ())
7439 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7440 				 "masked gather with integer mask not"
7441 				 " supported.");
7442 	      return false;
7443 	    }
7444 	}
7445       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7446 	       && memory_access_type != VMAT_GATHER_SCATTER)
7447 	{
7448 	  if (dump_enabled_p ())
7449 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7450 			     "unsupported access type for masked load.\n");
7451 	  return false;
7452 	}
7453     }
7454 
7455   if (!vec_stmt) /* transformation not required.  */
7456     {
7457       if (!slp)
7458 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7459 
7460       if (loop_vinfo
7461 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7462 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7463 				  memory_access_type, &gs_info);
7464 
7465       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7466       /* The SLP costs are calculated during SLP analysis.  */
7467       if (! slp_node)
7468 	vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7469 			      NULL, NULL, NULL);
7470       return true;
7471     }
7472 
7473   if (!slp)
7474     gcc_assert (memory_access_type
7475 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7476 
7477   if (dump_enabled_p ())
7478     dump_printf_loc (MSG_NOTE, vect_location,
7479                      "transform load. ncopies = %d\n", ncopies);
7480 
7481   /* Transform.  */
7482 
7483   ensure_base_align (dr);
7484 
7485   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7486     {
7487       vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7488 				    mask_dt);
7489       return true;
7490     }
7491 
7492   if (memory_access_type == VMAT_ELEMENTWISE
7493       || memory_access_type == VMAT_STRIDED_SLP)
7494     {
7495       gimple_stmt_iterator incr_gsi;
7496       bool insert_after;
7497       gimple *incr;
7498       tree offvar;
7499       tree ivstep;
7500       tree running_off;
7501       vec<constructor_elt, va_gc> *v = NULL;
7502       tree stride_base, stride_step, alias_off;
7503       /* Checked by get_load_store_type.  */
7504       unsigned int const_nunits = nunits.to_constant ();
7505       unsigned HOST_WIDE_INT cst_offset = 0;
7506 
7507       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7508       gcc_assert (!nested_in_vect_loop);
7509 
7510       if (grouped_load)
7511 	{
7512 	  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7513 	  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7514 	}
7515       else
7516 	{
7517 	  first_stmt = stmt;
7518 	  first_dr = dr;
7519 	}
7520       if (slp && grouped_load)
7521 	{
7522 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7523 	  ref_type = get_group_alias_ptr_type (first_stmt);
7524 	}
7525       else
7526 	{
7527 	  if (grouped_load)
7528 	    cst_offset
7529 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7530 		 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7531 	  group_size = 1;
7532 	  ref_type = reference_alias_ptr_type (DR_REF (dr));
7533 	}
7534 
7535       stride_base
7536 	= fold_build_pointer_plus
7537 	    (DR_BASE_ADDRESS (first_dr),
7538 	     size_binop (PLUS_EXPR,
7539 			 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7540 			 convert_to_ptrofftype (DR_INIT (first_dr))));
7541       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7542 
7543       /* For a load with loop-invariant (but other than power-of-2)
7544          stride (i.e. not a grouped access) like so:
7545 
7546 	   for (i = 0; i < n; i += stride)
7547 	     ... = array[i];
7548 
7549 	 we generate a new induction variable and new accesses to
7550 	 form a new vector (or vectors, depending on ncopies):
7551 
7552 	   for (j = 0; ; j += VF*stride)
7553 	     tmp1 = array[j];
7554 	     tmp2 = array[j + stride];
7555 	     ...
7556 	     vectemp = {tmp1, tmp2, ...}
7557          */
7558 
7559       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7560 			    build_int_cst (TREE_TYPE (stride_step), vf));
7561 
7562       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7563 
7564       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7565       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7566       create_iv (stride_base, ivstep, NULL,
7567 		 loop, &incr_gsi, insert_after,
7568 		 &offvar, NULL);
7569       incr = gsi_stmt (incr_gsi);
7570       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7571 
7572       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7573 
7574       prev_stmt_info = NULL;
7575       running_off = offvar;
7576       alias_off = build_int_cst (ref_type, 0);
7577       int nloads = const_nunits;
7578       int lnel = 1;
7579       tree ltype = TREE_TYPE (vectype);
7580       tree lvectype = vectype;
7581       auto_vec<tree> dr_chain;
7582       if (memory_access_type == VMAT_STRIDED_SLP)
7583 	{
7584 	  if (group_size < const_nunits)
7585 	    {
7586 	      /* First check if vec_init optab supports construction from
7587 		 vector elts directly.  */
7588 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7589 	      machine_mode vmode;
7590 	      if (mode_for_vector (elmode, group_size).exists (&vmode)
7591 		  && VECTOR_MODE_P (vmode)
7592 		  && targetm.vector_mode_supported_p (vmode)
7593 		  && (convert_optab_handler (vec_init_optab,
7594 					     TYPE_MODE (vectype), vmode)
7595 		      != CODE_FOR_nothing))
7596 		{
7597 		  nloads = const_nunits / group_size;
7598 		  lnel = group_size;
7599 		  ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7600 		}
7601 	      else
7602 		{
7603 		  /* Otherwise avoid emitting a constructor of vector elements
7604 		     by performing the loads using an integer type of the same
7605 		     size, constructing a vector of those and then
7606 		     re-interpreting it as the original vector type.
7607 		     This avoids a huge runtime penalty due to the general
7608 		     inability to perform store forwarding from smaller stores
7609 		     to a larger load.  */
7610 		  unsigned lsize
7611 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7612 		  elmode = int_mode_for_size (lsize, 0).require ();
7613 		  unsigned int lnunits = const_nunits / group_size;
7614 		  /* If we can't construct such a vector fall back to
7615 		     element loads of the original vector type.  */
7616 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
7617 		      && VECTOR_MODE_P (vmode)
7618 		      && targetm.vector_mode_supported_p (vmode)
7619 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
7620 			  != CODE_FOR_nothing))
7621 		    {
7622 		      nloads = lnunits;
7623 		      lnel = group_size;
7624 		      ltype = build_nonstandard_integer_type (lsize, 1);
7625 		      lvectype = build_vector_type (ltype, nloads);
7626 		    }
7627 		}
7628 	    }
7629 	  else
7630 	    {
7631 	      nloads = 1;
7632 	      lnel = const_nunits;
7633 	      ltype = vectype;
7634 	    }
7635 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7636 	}
7637       if (slp)
7638 	{
7639 	  /* For SLP permutation support we need to load the whole group,
7640 	     not only the number of vector stmts the permutation result
7641 	     fits in.  */
7642 	  if (slp_perm)
7643 	    {
7644 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7645 		 variable VF.  */
7646 	      unsigned int const_vf = vf.to_constant ();
7647 	      ncopies = CEIL (group_size * const_vf, const_nunits);
7648 	      dr_chain.create (ncopies);
7649 	    }
7650 	  else
7651 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7652 	}
7653       unsigned int group_el = 0;
7654       unsigned HOST_WIDE_INT
7655 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7656       for (j = 0; j < ncopies; j++)
7657 	{
7658 	  if (nloads > 1)
7659 	    vec_alloc (v, nloads);
7660 	  for (i = 0; i < nloads; i++)
7661 	    {
7662 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
7663 					     group_el * elsz + cst_offset);
7664 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7665 	      vect_copy_ref_info (data_ref, DR_REF (first_dr));
7666 	      new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7667 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
7668 	      if (nloads > 1)
7669 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7670 					gimple_assign_lhs (new_stmt));
7671 
7672 	      group_el += lnel;
7673 	      if (! slp
7674 		  || group_el == group_size)
7675 		{
7676 		  tree newoff = copy_ssa_name (running_off);
7677 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7678 						      running_off, stride_step);
7679 		  vect_finish_stmt_generation (stmt, incr, gsi);
7680 
7681 		  running_off = newoff;
7682 		  group_el = 0;
7683 		}
7684 	    }
7685 	  if (nloads > 1)
7686 	    {
7687 	      tree vec_inv = build_constructor (lvectype, v);
7688 	      new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7689 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
7690 	      if (lvectype != vectype)
7691 		{
7692 		  new_stmt = gimple_build_assign (make_ssa_name (vectype),
7693 						  VIEW_CONVERT_EXPR,
7694 						  build1 (VIEW_CONVERT_EXPR,
7695 							  vectype, new_temp));
7696 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
7697 		}
7698 	    }
7699 
7700 	  if (slp)
7701 	    {
7702 	      if (slp_perm)
7703 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7704 	      else
7705 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7706 	    }
7707 	  else
7708 	    {
7709 	      if (j == 0)
7710 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7711 	      else
7712 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7713 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
7714 	    }
7715 	}
7716       if (slp_perm)
7717 	{
7718 	  unsigned n_perms;
7719 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7720 					slp_node_instance, false, &n_perms);
7721 	}
7722       return true;
7723     }
7724 
7725   if (memory_access_type == VMAT_GATHER_SCATTER
7726       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7727     grouped_load = false;
7728 
7729   if (grouped_load)
7730     {
7731       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7732       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7733       /* For SLP vectorization we directly vectorize a subchain
7734          without permutation.  */
7735       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7736 	first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7737       /* For BB vectorization always use the first stmt to base
7738 	 the data ref pointer on.  */
7739       if (bb_vinfo)
7740 	first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7741 
7742       /* Check if the chain of loads is already vectorized.  */
7743       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7744 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7745 	     ???  But we can only do so if there is exactly one
7746 	     as we have no way to get at the rest.  Leave the CSE
7747 	     opportunity alone.
7748 	     ???  With the group load eventually participating
7749 	     in multiple different permutations (having multiple
7750 	     slp nodes which refer to the same group) the CSE
7751 	     is even wrong code.  See PR56270.  */
7752 	  && !slp)
7753 	{
7754 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7755 	  return true;
7756 	}
7757       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7758       group_gap_adj = 0;
7759 
7760       /* VEC_NUM is the number of vect stmts to be created for this group.  */
7761       if (slp)
7762 	{
7763 	  grouped_load = false;
7764 	  /* For SLP permutation support we need to load the whole group,
7765 	     not only the number of vector stmts the permutation result
7766 	     fits in.  */
7767 	  if (slp_perm)
7768 	    {
7769 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7770 		 variable VF.  */
7771 	      unsigned int const_vf = vf.to_constant ();
7772 	      unsigned int const_nunits = nunits.to_constant ();
7773 	      vec_num = CEIL (group_size * const_vf, const_nunits);
7774 	      group_gap_adj = vf * group_size - nunits * vec_num;
7775 	    }
7776 	  else
7777 	    {
7778 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7779 	      group_gap_adj
7780 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7781 	    }
7782     	}
7783       else
7784 	vec_num = group_size;
7785 
7786       ref_type = get_group_alias_ptr_type (first_stmt);
7787     }
7788   else
7789     {
7790       first_stmt = stmt;
7791       first_dr = dr;
7792       group_size = vec_num = 1;
7793       group_gap_adj = 0;
7794       ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7795     }
7796 
7797   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7798   gcc_assert (alignment_support_scheme);
7799   vec_loop_masks *loop_masks
7800     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7801        ? &LOOP_VINFO_MASKS (loop_vinfo)
7802        : NULL);
7803   /* Targets with store-lane instructions must not require explicit
7804      realignment.  vect_supportable_dr_alignment always returns either
7805      dr_aligned or dr_unaligned_supported for masked operations.  */
7806   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7807 	       && !mask
7808 	       && !loop_masks)
7809 	      || alignment_support_scheme == dr_aligned
7810 	      || alignment_support_scheme == dr_unaligned_supported);
7811 
7812   /* In case the vectorization factor (VF) is bigger than the number
7813      of elements that we can fit in a vectype (nunits), we have to generate
7814      more than one vector stmt - i.e - we need to "unroll" the
7815      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
7816      from one copy of the vector stmt to the next, in the field
7817      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
7818      stages to find the correct vector defs to be used when vectorizing
7819      stmts that use the defs of the current stmt.  The example below
7820      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7821      need to create 4 vectorized stmts):
7822 
7823      before vectorization:
7824                                 RELATED_STMT    VEC_STMT
7825         S1:     x = memref      -               -
7826         S2:     z = x + 1       -               -
7827 
7828      step 1: vectorize stmt S1:
7829         We first create the vector stmt VS1_0, and, as usual, record a
7830         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7831         Next, we create the vector stmt VS1_1, and record a pointer to
7832         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7833         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
7834         stmts and pointers:
7835                                 RELATED_STMT    VEC_STMT
7836         VS1_0:  vx0 = memref0   VS1_1           -
7837         VS1_1:  vx1 = memref1   VS1_2           -
7838         VS1_2:  vx2 = memref2   VS1_3           -
7839         VS1_3:  vx3 = memref3   -               -
7840         S1:     x = load        -               VS1_0
7841         S2:     z = x + 1       -               -
7842 
7843      See in documentation in vect_get_vec_def_for_stmt_copy for how the
7844      information we recorded in RELATED_STMT field is used to vectorize
7845      stmt S2.  */
7846 
7847   /* In case of interleaving (non-unit grouped access):
7848 
7849      S1:  x2 = &base + 2
7850      S2:  x0 = &base
7851      S3:  x1 = &base + 1
7852      S4:  x3 = &base + 3
7853 
7854      Vectorized loads are created in the order of memory accesses
7855      starting from the access of the first stmt of the chain:
7856 
7857      VS1: vx0 = &base
7858      VS2: vx1 = &base + vec_size*1
7859      VS3: vx3 = &base + vec_size*2
7860      VS4: vx4 = &base + vec_size*3
7861 
7862      Then permutation statements are generated:
7863 
7864      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7865      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7866        ...
7867 
7868      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7869      (the order of the data-refs in the output of vect_permute_load_chain
7870      corresponds to the order of scalar stmts in the interleaving chain - see
7871      the documentation of vect_permute_load_chain()).
7872      The generation of permutation stmts and recording them in
7873      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7874 
7875      In case of both multiple types and interleaving, the vector loads and
7876      permutation stmts above are created for every copy.  The result vector
7877      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7878      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
7879 
7880   /* If the data reference is aligned (dr_aligned) or potentially unaligned
7881      on a target that supports unaligned accesses (dr_unaligned_supported)
7882      we generate the following code:
7883          p = initial_addr;
7884          indx = 0;
7885          loop {
7886 	   p = p + indx * vectype_size;
7887            vec_dest = *(p);
7888            indx = indx + 1;
7889          }
7890 
7891      Otherwise, the data reference is potentially unaligned on a target that
7892      does not support unaligned accesses (dr_explicit_realign_optimized) -
7893      then generate the following code, in which the data in each iteration is
7894      obtained by two vector loads, one from the previous iteration, and one
7895      from the current iteration:
7896          p1 = initial_addr;
7897          msq_init = *(floor(p1))
7898          p2 = initial_addr + VS - 1;
7899          realignment_token = call target_builtin;
7900          indx = 0;
7901          loop {
7902            p2 = p2 + indx * vectype_size
7903            lsq = *(floor(p2))
7904            vec_dest = realign_load (msq, lsq, realignment_token)
7905            indx = indx + 1;
7906            msq = lsq;
7907          }   */
7908 
7909   /* If the misalignment remains the same throughout the execution of the
7910      loop, we can create the init_addr and permutation mask at the loop
7911      preheader.  Otherwise, it needs to be created inside the loop.
7912      This can only occur when vectorizing memory accesses in the inner-loop
7913      nested within an outer-loop that is being vectorized.  */
7914 
7915   if (nested_in_vect_loop
7916       && !multiple_p (DR_STEP_ALIGNMENT (dr),
7917 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
7918     {
7919       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7920       compute_in_loop = true;
7921     }
7922 
7923   if ((alignment_support_scheme == dr_explicit_realign_optimized
7924        || alignment_support_scheme == dr_explicit_realign)
7925       && !compute_in_loop)
7926     {
7927       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7928 				    alignment_support_scheme, NULL_TREE,
7929 				    &at_loop);
7930       if (alignment_support_scheme == dr_explicit_realign_optimized)
7931 	{
7932 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7933 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7934 				    size_one_node);
7935 	}
7936     }
7937   else
7938     at_loop = loop;
7939 
7940   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7941     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7942 
7943   tree bump;
7944   tree vec_offset = NULL_TREE;
7945   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7946     {
7947       aggr_type = NULL_TREE;
7948       bump = NULL_TREE;
7949     }
7950   else if (memory_access_type == VMAT_GATHER_SCATTER)
7951     {
7952       aggr_type = elem_type;
7953       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7954 				       &bump, &vec_offset);
7955     }
7956   else
7957     {
7958       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7959 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7960       else
7961 	aggr_type = vectype;
7962       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
7963     }
7964 
7965   tree vec_mask = NULL_TREE;
7966   prev_stmt_info = NULL;
7967   poly_uint64 group_elt = 0;
7968   for (j = 0; j < ncopies; j++)
7969     {
7970       /* 1. Create the vector or array pointer update chain.  */
7971       if (j == 0)
7972 	{
7973 	  bool simd_lane_access_p
7974 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7975 	  if (simd_lane_access_p
7976 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7977 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7978 	      && integer_zerop (DR_OFFSET (first_dr))
7979 	      && integer_zerop (DR_INIT (first_dr))
7980 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
7981 					get_alias_set (TREE_TYPE (ref_type)))
7982 	      && (alignment_support_scheme == dr_aligned
7983 		  || alignment_support_scheme == dr_unaligned_supported))
7984 	    {
7985 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7986 	      dataref_offset = build_int_cst (ref_type, 0);
7987 	      inv_p = false;
7988 	    }
7989 	  else if (first_stmt_for_drptr
7990 		   && first_stmt != first_stmt_for_drptr)
7991 	    {
7992 	      dataref_ptr
7993 		= vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7994 					    at_loop, offset, &dummy, gsi,
7995 					    &ptr_incr, simd_lane_access_p,
7996 					    &inv_p, byte_offset, bump);
7997 	      /* Adjust the pointer by the difference to first_stmt.  */
7998 	      data_reference_p ptrdr
7999 		= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8000 	      tree diff = fold_convert (sizetype,
8001 					size_binop (MINUS_EXPR,
8002 						    DR_INIT (first_dr),
8003 						    DR_INIT (ptrdr)));
8004 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8005 					     stmt, diff);
8006 	    }
8007 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8008 	    {
8009 	      vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8010 					   &dataref_ptr, &vec_offset);
8011 	      inv_p = false;
8012 	    }
8013 	  else
8014 	    dataref_ptr
8015 	      = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8016 					  offset, &dummy, gsi, &ptr_incr,
8017 					  simd_lane_access_p, &inv_p,
8018 					  byte_offset, bump);
8019 	  if (mask)
8020 	    vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8021 						     mask_vectype);
8022 	}
8023       else
8024 	{
8025 	  if (dataref_offset)
8026 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8027 					      bump);
8028 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8029 	    vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8030 							 vec_offset);
8031 	  else
8032 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8033 					   stmt, bump);
8034 	  if (mask)
8035 	    vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8036 	}
8037 
8038       if (grouped_load || slp_perm)
8039 	dr_chain.create (vec_num);
8040 
8041       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8042 	{
8043 	  tree vec_array;
8044 
8045 	  vec_array = create_vector_array (vectype, vec_num);
8046 
8047 	  tree final_mask = NULL_TREE;
8048 	  if (loop_masks)
8049 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8050 					     vectype, j);
8051 	  if (vec_mask)
8052 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8053 						  vec_mask, gsi);
8054 
8055 	  gcall *call;
8056 	  if (final_mask)
8057 	    {
8058 	      /* Emit:
8059 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8060 		                                VEC_MASK).  */
8061 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8062 	      tree alias_ptr = build_int_cst (ref_type, align);
8063 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8064 						 dataref_ptr, alias_ptr,
8065 						 final_mask);
8066 	    }
8067 	  else
8068 	    {
8069 	      /* Emit:
8070 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
8071 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8072 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8073 	    }
8074 	  gimple_call_set_lhs (call, vec_array);
8075 	  gimple_call_set_nothrow (call, true);
8076 	  new_stmt = call;
8077 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8078 
8079 	  /* Extract each vector into an SSA_NAME.  */
8080 	  for (i = 0; i < vec_num; i++)
8081 	    {
8082 	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
8083 					    vec_array, i);
8084 	      dr_chain.quick_push (new_temp);
8085 	    }
8086 
8087 	  /* Record the mapping between SSA_NAMEs and statements.  */
8088 	  vect_record_grouped_load_vectors (stmt, dr_chain);
8089 	}
8090       else
8091 	{
8092 	  for (i = 0; i < vec_num; i++)
8093 	    {
8094 	      tree final_mask = NULL_TREE;
8095 	      if (loop_masks
8096 		  && memory_access_type != VMAT_INVARIANT)
8097 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8098 						 vec_num * ncopies,
8099 						 vectype, vec_num * j + i);
8100 	      if (vec_mask)
8101 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8102 						      vec_mask, gsi);
8103 
8104 	      if (i > 0)
8105 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8106 					       stmt, bump);
8107 
8108 	      /* 2. Create the vector-load in the loop.  */
8109 	      switch (alignment_support_scheme)
8110 		{
8111 		case dr_aligned:
8112 		case dr_unaligned_supported:
8113 		  {
8114 		    unsigned int align, misalign;
8115 
8116 		    if (memory_access_type == VMAT_GATHER_SCATTER)
8117 		      {
8118 			tree scale = size_int (gs_info.scale);
8119 			gcall *call;
8120 			if (loop_masks)
8121 			  call = gimple_build_call_internal
8122 			    (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8123 			     vec_offset, scale, final_mask);
8124 			else
8125 			  call = gimple_build_call_internal
8126 			    (IFN_GATHER_LOAD, 3, dataref_ptr,
8127 			     vec_offset, scale);
8128 			gimple_call_set_nothrow (call, true);
8129 			new_stmt = call;
8130 			data_ref = NULL_TREE;
8131 			break;
8132 		      }
8133 
8134 		    align = DR_TARGET_ALIGNMENT (dr);
8135 		    if (alignment_support_scheme == dr_aligned)
8136 		      {
8137 			gcc_assert (aligned_access_p (first_dr));
8138 			misalign = 0;
8139 		      }
8140 		    else if (DR_MISALIGNMENT (first_dr) == -1)
8141 		      {
8142 			align = dr_alignment (vect_dr_behavior (first_dr));
8143 			misalign = 0;
8144 		      }
8145 		    else
8146 		      misalign = DR_MISALIGNMENT (first_dr);
8147 		    if (dataref_offset == NULL_TREE
8148 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
8149 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8150 					      align, misalign);
8151 
8152 		    if (final_mask)
8153 		      {
8154 			align = least_bit_hwi (misalign | align);
8155 			tree ptr = build_int_cst (ref_type, align);
8156 			gcall *call
8157 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8158 							dataref_ptr, ptr,
8159 							final_mask);
8160 			gimple_call_set_nothrow (call, true);
8161 			new_stmt = call;
8162 			data_ref = NULL_TREE;
8163 		      }
8164 		    else
8165 		      {
8166 			data_ref
8167 			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
8168 					 dataref_offset
8169 					 ? dataref_offset
8170 					 : build_int_cst (ref_type, 0));
8171 			if (alignment_support_scheme == dr_aligned)
8172 			  ;
8173 			else if (DR_MISALIGNMENT (first_dr) == -1)
8174 			  TREE_TYPE (data_ref)
8175 			    = build_aligned_type (TREE_TYPE (data_ref),
8176 						  align * BITS_PER_UNIT);
8177 			else
8178 			  TREE_TYPE (data_ref)
8179 			    = build_aligned_type (TREE_TYPE (data_ref),
8180 						  TYPE_ALIGN (elem_type));
8181 		      }
8182 		    break;
8183 		  }
8184 		case dr_explicit_realign:
8185 		  {
8186 		    tree ptr, bump;
8187 
8188 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8189 
8190 		    if (compute_in_loop)
8191 		      msq = vect_setup_realignment (first_stmt, gsi,
8192 						    &realignment_token,
8193 						    dr_explicit_realign,
8194 						    dataref_ptr, NULL);
8195 
8196 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8197 		      ptr = copy_ssa_name (dataref_ptr);
8198 		    else
8199 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8200 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8201 		    new_stmt = gimple_build_assign
8202 				 (ptr, BIT_AND_EXPR, dataref_ptr,
8203 				  build_int_cst
8204 				  (TREE_TYPE (dataref_ptr),
8205 				   -(HOST_WIDE_INT) align));
8206 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8207 		    data_ref
8208 		      = build2 (MEM_REF, vectype, ptr,
8209 				build_int_cst (ref_type, 0));
8210 		    vect_copy_ref_info (data_ref, DR_REF (first_dr));
8211 		    vec_dest = vect_create_destination_var (scalar_dest,
8212 							    vectype);
8213 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
8214 		    new_temp = make_ssa_name (vec_dest, new_stmt);
8215 		    gimple_assign_set_lhs (new_stmt, new_temp);
8216 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8217 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8218 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8219 		    msq = new_temp;
8220 
8221 		    bump = size_binop (MULT_EXPR, vs,
8222 				       TYPE_SIZE_UNIT (elem_type));
8223 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
8224 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8225 		    new_stmt = gimple_build_assign
8226 				 (NULL_TREE, BIT_AND_EXPR, ptr,
8227 				  build_int_cst
8228 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8229 		    ptr = copy_ssa_name (ptr, new_stmt);
8230 		    gimple_assign_set_lhs (new_stmt, ptr);
8231 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8232 		    data_ref
8233 		      = build2 (MEM_REF, vectype, ptr,
8234 				build_int_cst (ref_type, 0));
8235 		    break;
8236 		  }
8237 		case dr_explicit_realign_optimized:
8238 		  {
8239 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8240 		      new_temp = copy_ssa_name (dataref_ptr);
8241 		    else
8242 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8243 		    unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8244 		    new_stmt = gimple_build_assign
8245 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
8246 		       build_int_cst (TREE_TYPE (dataref_ptr),
8247 				     -(HOST_WIDE_INT) align));
8248 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
8249 		    data_ref
8250 		      = build2 (MEM_REF, vectype, new_temp,
8251 				build_int_cst (ref_type, 0));
8252 		    break;
8253 		  }
8254 		default:
8255 		  gcc_unreachable ();
8256 		}
8257 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
8258 	      /* DATA_REF is null if we've already built the statement.  */
8259 	      if (data_ref)
8260 		{
8261 		  vect_copy_ref_info (data_ref, DR_REF (first_dr));
8262 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
8263 		}
8264 	      new_temp = make_ssa_name (vec_dest, new_stmt);
8265 	      gimple_set_lhs (new_stmt, new_temp);
8266 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8267 
8268 	      /* 3. Handle explicit realignment if necessary/supported.
8269 		 Create in loop:
8270 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
8271 	      if (alignment_support_scheme == dr_explicit_realign_optimized
8272 		  || alignment_support_scheme == dr_explicit_realign)
8273 		{
8274 		  lsq = gimple_assign_lhs (new_stmt);
8275 		  if (!realignment_token)
8276 		    realignment_token = dataref_ptr;
8277 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
8278 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8279 						  msq, lsq, realignment_token);
8280 		  new_temp = make_ssa_name (vec_dest, new_stmt);
8281 		  gimple_assign_set_lhs (new_stmt, new_temp);
8282 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8283 
8284 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
8285 		    {
8286 		      gcc_assert (phi);
8287 		      if (i == vec_num - 1 && j == ncopies - 1)
8288 			add_phi_arg (phi, lsq,
8289 				     loop_latch_edge (containing_loop),
8290 				     UNKNOWN_LOCATION);
8291 		      msq = lsq;
8292 		    }
8293 		}
8294 
8295 	      /* 4. Handle invariant-load.  */
8296 	      if (inv_p && !bb_vinfo)
8297 		{
8298 		  gcc_assert (!grouped_load);
8299 		  /* If we have versioned for aliasing or the loop doesn't
8300 		     have any data dependencies that would preclude this,
8301 		     then we are sure this is a loop invariant load and
8302 		     thus we can insert it on the preheader edge.  */
8303 		  if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8304 		      && !nested_in_vect_loop
8305 		      && hoist_defs_of_uses (stmt, loop))
8306 		    {
8307 		      if (dump_enabled_p ())
8308 			{
8309 			  dump_printf_loc (MSG_NOTE, vect_location,
8310 					   "hoisting out of the vectorized "
8311 					   "loop: ");
8312 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8313 			}
8314 		      tree tem = copy_ssa_name (scalar_dest);
8315 		      gsi_insert_on_edge_immediate
8316 			(loop_preheader_edge (loop),
8317 			 gimple_build_assign (tem,
8318 					      unshare_expr
8319 					        (gimple_assign_rhs1 (stmt))));
8320 		      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8321 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
8322 		      set_vinfo_for_stmt (new_stmt,
8323 					  new_stmt_vec_info (new_stmt, vinfo));
8324 		    }
8325 		  else
8326 		    {
8327 		      gimple_stmt_iterator gsi2 = *gsi;
8328 		      gsi_next (&gsi2);
8329 		      new_temp = vect_init_vector (stmt, scalar_dest,
8330 						   vectype, &gsi2);
8331 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
8332 		    }
8333 		}
8334 
8335 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8336 		{
8337 		  tree perm_mask = perm_mask_for_reverse (vectype);
8338 		  new_temp = permute_vec_elements (new_temp, new_temp,
8339 						   perm_mask, stmt, gsi);
8340 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
8341 		}
8342 
8343 	      /* Collect vector loads and later create their permutation in
8344 		 vect_transform_grouped_load ().  */
8345 	      if (grouped_load || slp_perm)
8346 		dr_chain.quick_push (new_temp);
8347 
8348 	      /* Store vector loads in the corresponding SLP_NODE.  */
8349 	      if (slp && !slp_perm)
8350 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8351 
8352 	      /* With SLP permutation we load the gaps as well, without
8353 	         we need to skip the gaps after we manage to fully load
8354 		 all elements.  group_gap_adj is GROUP_SIZE here.  */
8355 	      group_elt += nunits;
8356 	      if (maybe_ne (group_gap_adj, 0U)
8357 		  && !slp_perm
8358 		  && known_eq (group_elt, group_size - group_gap_adj))
8359 		{
8360 		  poly_wide_int bump_val
8361 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8362 		       * group_gap_adj);
8363 		  tree bump = wide_int_to_tree (sizetype, bump_val);
8364 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8365 						 stmt, bump);
8366 		  group_elt = 0;
8367 		}
8368 	    }
8369 	  /* Bump the vector pointer to account for a gap or for excess
8370 	     elements loaded for a permuted SLP load.  */
8371 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8372 	    {
8373 	      poly_wide_int bump_val
8374 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8375 		   * group_gap_adj);
8376 	      tree bump = wide_int_to_tree (sizetype, bump_val);
8377 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8378 					     stmt, bump);
8379 	    }
8380 	}
8381 
8382       if (slp && !slp_perm)
8383 	continue;
8384 
8385       if (slp_perm)
8386         {
8387 	  unsigned n_perms;
8388           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8389                                              slp_node_instance, false,
8390 					     &n_perms))
8391             {
8392               dr_chain.release ();
8393               return false;
8394             }
8395         }
8396       else
8397         {
8398           if (grouped_load)
8399   	    {
8400 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
8401 		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8402 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8403 	    }
8404           else
8405 	    {
8406 	      if (j == 0)
8407 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8408 	      else
8409 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8410 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
8411 	    }
8412         }
8413       dr_chain.release ();
8414     }
8415 
8416   return true;
8417 }
8418 
8419 /* Function vect_is_simple_cond.
8420 
8421    Input:
8422    LOOP - the loop that is being vectorized.
8423    COND - Condition that is checked for simple use.
8424 
8425    Output:
8426    *COMP_VECTYPE - the vector type for the comparison.
8427    *DTS - The def types for the arguments of the comparison
8428 
8429    Returns whether a COND can be vectorized.  Checks whether
8430    condition operands are supportable using vec_is_simple_use.  */
8431 
8432 static bool
8433 vect_is_simple_cond (tree cond, vec_info *vinfo,
8434 		     tree *comp_vectype, enum vect_def_type *dts,
8435 		     tree vectype)
8436 {
8437   tree lhs, rhs;
8438   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8439 
8440   /* Mask case.  */
8441   if (TREE_CODE (cond) == SSA_NAME
8442       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8443     {
8444       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8445       if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8446 			       &dts[0], comp_vectype)
8447 	  || !*comp_vectype
8448 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8449 	return false;
8450       return true;
8451     }
8452 
8453   if (!COMPARISON_CLASS_P (cond))
8454     return false;
8455 
8456   lhs = TREE_OPERAND (cond, 0);
8457   rhs = TREE_OPERAND (cond, 1);
8458 
8459   if (TREE_CODE (lhs) == SSA_NAME)
8460     {
8461       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8462       if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8463 	return false;
8464     }
8465   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8466 	   || TREE_CODE (lhs) == FIXED_CST)
8467     dts[0] = vect_constant_def;
8468   else
8469     return false;
8470 
8471   if (TREE_CODE (rhs) == SSA_NAME)
8472     {
8473       gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8474       if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8475 	return false;
8476     }
8477   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8478 	   || TREE_CODE (rhs) == FIXED_CST)
8479     dts[1] = vect_constant_def;
8480   else
8481     return false;
8482 
8483   if (vectype1 && vectype2
8484       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8485 		   TYPE_VECTOR_SUBPARTS (vectype2)))
8486     return false;
8487 
8488   *comp_vectype = vectype1 ? vectype1 : vectype2;
8489   /* Invariant comparison.  */
8490   if (! *comp_vectype)
8491     {
8492       tree scalar_type = TREE_TYPE (lhs);
8493       /* If we can widen the comparison to match vectype do so.  */
8494       if (INTEGRAL_TYPE_P (scalar_type)
8495 	  && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8496 			      TYPE_SIZE (TREE_TYPE (vectype))))
8497 	scalar_type = build_nonstandard_integer_type
8498 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8499 	   TYPE_UNSIGNED (scalar_type));
8500       *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8501     }
8502 
8503   return true;
8504 }
8505 
8506 /* vectorizable_condition.
8507 
8508    Check if STMT is conditional modify expression that can be vectorized.
8509    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8510    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
8511    at GSI.
8512 
8513    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8514    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8515    else clause if it is 2).
8516 
8517    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8518 
8519 bool
8520 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8521 			gimple **vec_stmt, tree reduc_def, int reduc_index,
8522 			slp_tree slp_node)
8523 {
8524   tree scalar_dest = NULL_TREE;
8525   tree vec_dest = NULL_TREE;
8526   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8527   tree then_clause, else_clause;
8528   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8529   tree comp_vectype = NULL_TREE;
8530   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8531   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8532   tree vec_compare;
8533   tree new_temp;
8534   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8535   enum vect_def_type dts[4]
8536     = {vect_unknown_def_type, vect_unknown_def_type,
8537        vect_unknown_def_type, vect_unknown_def_type};
8538   int ndts = 4;
8539   int ncopies;
8540   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8541   stmt_vec_info prev_stmt_info = NULL;
8542   int i, j;
8543   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8544   vec<tree> vec_oprnds0 = vNULL;
8545   vec<tree> vec_oprnds1 = vNULL;
8546   vec<tree> vec_oprnds2 = vNULL;
8547   vec<tree> vec_oprnds3 = vNULL;
8548   tree vec_cmp_type;
8549   bool masked = false;
8550 
8551   if (reduc_index && STMT_SLP_TYPE (stmt_info))
8552     return false;
8553 
8554   vect_reduction_type reduction_type
8555     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8556   if (reduction_type == TREE_CODE_REDUCTION)
8557     {
8558       if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8559 	return false;
8560 
8561       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8562 	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8563 	       && reduc_def))
8564 	return false;
8565 
8566       /* FORNOW: not yet supported.  */
8567       if (STMT_VINFO_LIVE_P (stmt_info))
8568 	{
8569 	  if (dump_enabled_p ())
8570 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8571 			     "value used after loop.\n");
8572 	  return false;
8573 	}
8574     }
8575 
8576   /* Is vectorizable conditional operation?  */
8577   if (!is_gimple_assign (stmt))
8578     return false;
8579 
8580   code = gimple_assign_rhs_code (stmt);
8581 
8582   if (code != COND_EXPR)
8583     return false;
8584 
8585   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8586   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8587 
8588   if (slp_node)
8589     ncopies = 1;
8590   else
8591     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8592 
8593   gcc_assert (ncopies >= 1);
8594   if (reduc_index && ncopies > 1)
8595     return false; /* FORNOW */
8596 
8597   cond_expr = gimple_assign_rhs1 (stmt);
8598   then_clause = gimple_assign_rhs2 (stmt);
8599   else_clause = gimple_assign_rhs3 (stmt);
8600 
8601   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8602 			    &comp_vectype, &dts[0], vectype)
8603       || !comp_vectype)
8604     return false;
8605 
8606   gimple *def_stmt;
8607   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8608 			   &vectype1))
8609     return false;
8610   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8611 			   &vectype2))
8612     return false;
8613 
8614   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8615     return false;
8616 
8617   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8618     return false;
8619 
8620   masked = !COMPARISON_CLASS_P (cond_expr);
8621   vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8622 
8623   if (vec_cmp_type == NULL_TREE)
8624     return false;
8625 
8626   cond_code = TREE_CODE (cond_expr);
8627   if (!masked)
8628     {
8629       cond_expr0 = TREE_OPERAND (cond_expr, 0);
8630       cond_expr1 = TREE_OPERAND (cond_expr, 1);
8631     }
8632 
8633   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8634     {
8635       /* Boolean values may have another representation in vectors
8636 	 and therefore we prefer bit operations over comparison for
8637 	 them (which also works for scalar masks).  We store opcodes
8638 	 to use in bitop1 and bitop2.  Statement is vectorized as
8639 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8640 	 depending on bitop1 and bitop2 arity.  */
8641       switch (cond_code)
8642 	{
8643 	case GT_EXPR:
8644 	  bitop1 = BIT_NOT_EXPR;
8645 	  bitop2 = BIT_AND_EXPR;
8646 	  break;
8647 	case GE_EXPR:
8648 	  bitop1 = BIT_NOT_EXPR;
8649 	  bitop2 = BIT_IOR_EXPR;
8650 	  break;
8651 	case LT_EXPR:
8652 	  bitop1 = BIT_NOT_EXPR;
8653 	  bitop2 = BIT_AND_EXPR;
8654 	  std::swap (cond_expr0, cond_expr1);
8655 	  break;
8656 	case LE_EXPR:
8657 	  bitop1 = BIT_NOT_EXPR;
8658 	  bitop2 = BIT_IOR_EXPR;
8659 	  std::swap (cond_expr0, cond_expr1);
8660 	  break;
8661 	case NE_EXPR:
8662 	  bitop1 = BIT_XOR_EXPR;
8663 	  break;
8664 	case EQ_EXPR:
8665 	  bitop1 = BIT_XOR_EXPR;
8666 	  bitop2 = BIT_NOT_EXPR;
8667 	  break;
8668 	default:
8669 	  return false;
8670 	}
8671       cond_code = SSA_NAME;
8672     }
8673 
8674   if (!vec_stmt)
8675     {
8676       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8677       if (bitop1 != NOP_EXPR)
8678 	{
8679 	  machine_mode mode = TYPE_MODE (comp_vectype);
8680 	  optab optab;
8681 
8682 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8683 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8684 	    return false;
8685 
8686 	  if (bitop2 != NOP_EXPR)
8687 	    {
8688 	      optab = optab_for_tree_code (bitop2, comp_vectype,
8689 					   optab_default);
8690 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8691 		return false;
8692 	    }
8693 	}
8694       if (expand_vec_cond_expr_p (vectype, comp_vectype,
8695 				     cond_code))
8696 	{
8697 	  if (!slp_node)
8698 	    vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8699 	  return true;
8700 	}
8701       return false;
8702     }
8703 
8704   /* Transform.  */
8705 
8706   if (!slp_node)
8707     {
8708       vec_oprnds0.create (1);
8709       vec_oprnds1.create (1);
8710       vec_oprnds2.create (1);
8711       vec_oprnds3.create (1);
8712     }
8713 
8714   /* Handle def.  */
8715   scalar_dest = gimple_assign_lhs (stmt);
8716   if (reduction_type != EXTRACT_LAST_REDUCTION)
8717     vec_dest = vect_create_destination_var (scalar_dest, vectype);
8718 
8719   /* Handle cond expr.  */
8720   for (j = 0; j < ncopies; j++)
8721     {
8722       gimple *new_stmt = NULL;
8723       if (j == 0)
8724 	{
8725           if (slp_node)
8726             {
8727               auto_vec<tree, 4> ops;
8728 	      auto_vec<vec<tree>, 4> vec_defs;
8729 
8730 	      if (masked)
8731 		ops.safe_push (cond_expr);
8732 	      else
8733 		{
8734 		  ops.safe_push (cond_expr0);
8735 		  ops.safe_push (cond_expr1);
8736 		}
8737               ops.safe_push (then_clause);
8738               ops.safe_push (else_clause);
8739               vect_get_slp_defs (ops, slp_node, &vec_defs);
8740 	      vec_oprnds3 = vec_defs.pop ();
8741 	      vec_oprnds2 = vec_defs.pop ();
8742 	      if (!masked)
8743 		vec_oprnds1 = vec_defs.pop ();
8744 	      vec_oprnds0 = vec_defs.pop ();
8745             }
8746           else
8747             {
8748 	      gimple *gtemp;
8749 	      if (masked)
8750 		{
8751 		  vec_cond_lhs
8752 		    = vect_get_vec_def_for_operand (cond_expr, stmt,
8753 						    comp_vectype);
8754 		  vect_is_simple_use (cond_expr, stmt_info->vinfo,
8755 				      &gtemp, &dts[0]);
8756 		}
8757 	      else
8758 		{
8759 		  vec_cond_lhs
8760 		    = vect_get_vec_def_for_operand (cond_expr0,
8761 						    stmt, comp_vectype);
8762 		  vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8763 
8764 		  vec_cond_rhs
8765 		    = vect_get_vec_def_for_operand (cond_expr1,
8766 						    stmt, comp_vectype);
8767 		  vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8768 		}
8769 	      if (reduc_index == 1)
8770 		vec_then_clause = reduc_def;
8771 	      else
8772 		{
8773 		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8774 								  stmt);
8775 	          vect_is_simple_use (then_clause, loop_vinfo,
8776 				      &gtemp, &dts[2]);
8777 		}
8778 	      if (reduc_index == 2)
8779 		vec_else_clause = reduc_def;
8780 	      else
8781 		{
8782 		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8783 								  stmt);
8784 		  vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8785 		}
8786 	    }
8787 	}
8788       else
8789 	{
8790 	  vec_cond_lhs
8791 	    = vect_get_vec_def_for_stmt_copy (dts[0],
8792 					      vec_oprnds0.pop ());
8793 	  if (!masked)
8794 	    vec_cond_rhs
8795 	      = vect_get_vec_def_for_stmt_copy (dts[1],
8796 						vec_oprnds1.pop ());
8797 
8798 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8799 							    vec_oprnds2.pop ());
8800 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8801 							    vec_oprnds3.pop ());
8802 	}
8803 
8804       if (!slp_node)
8805         {
8806 	  vec_oprnds0.quick_push (vec_cond_lhs);
8807 	  if (!masked)
8808 	    vec_oprnds1.quick_push (vec_cond_rhs);
8809 	  vec_oprnds2.quick_push (vec_then_clause);
8810 	  vec_oprnds3.quick_push (vec_else_clause);
8811 	}
8812 
8813       /* Arguments are ready.  Create the new vector stmt.  */
8814       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8815         {
8816           vec_then_clause = vec_oprnds2[i];
8817           vec_else_clause = vec_oprnds3[i];
8818 
8819 	  if (masked)
8820 	    vec_compare = vec_cond_lhs;
8821 	  else
8822 	    {
8823 	      vec_cond_rhs = vec_oprnds1[i];
8824 	      if (bitop1 == NOP_EXPR)
8825 		vec_compare = build2 (cond_code, vec_cmp_type,
8826 				      vec_cond_lhs, vec_cond_rhs);
8827 	      else
8828 		{
8829 		  new_temp = make_ssa_name (vec_cmp_type);
8830 		  if (bitop1 == BIT_NOT_EXPR)
8831 		    new_stmt = gimple_build_assign (new_temp, bitop1,
8832 						    vec_cond_rhs);
8833 		  else
8834 		    new_stmt
8835 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8836 					     vec_cond_rhs);
8837 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8838 		  if (bitop2 == NOP_EXPR)
8839 		    vec_compare = new_temp;
8840 		  else if (bitop2 == BIT_NOT_EXPR)
8841 		    {
8842 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
8843 		      vec_compare = new_temp;
8844 		      std::swap (vec_then_clause, vec_else_clause);
8845 		    }
8846 		  else
8847 		    {
8848 		      vec_compare = make_ssa_name (vec_cmp_type);
8849 		      new_stmt
8850 			= gimple_build_assign (vec_compare, bitop2,
8851 					       vec_cond_lhs, new_temp);
8852 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8853 		    }
8854 		}
8855 	    }
8856 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
8857 	    {
8858 	      if (!is_gimple_val (vec_compare))
8859 		{
8860 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
8861 		  new_stmt = gimple_build_assign (vec_compare_name,
8862 						  vec_compare);
8863 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8864 		  vec_compare = vec_compare_name;
8865 		}
8866 	      gcc_assert (reduc_index == 2);
8867 	      new_stmt = gimple_build_call_internal
8868 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8869 		 vec_then_clause);
8870 	      gimple_call_set_lhs (new_stmt, scalar_dest);
8871 	      SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8872 	      if (stmt == gsi_stmt (*gsi))
8873 		vect_finish_replace_stmt (stmt, new_stmt);
8874 	      else
8875 		{
8876 		  /* In this case we're moving the definition to later in the
8877 		     block.  That doesn't matter because the only uses of the
8878 		     lhs are in phi statements.  */
8879 		  gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8880 		  gsi_remove (&old_gsi, true);
8881 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
8882 		}
8883 	    }
8884 	  else
8885 	    {
8886 	      new_temp = make_ssa_name (vec_dest);
8887 	      new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8888 					      vec_compare, vec_then_clause,
8889 					      vec_else_clause);
8890 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
8891 	    }
8892           if (slp_node)
8893             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8894         }
8895 
8896         if (slp_node)
8897           continue;
8898 
8899         if (j == 0)
8900           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8901         else
8902           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8903 
8904         prev_stmt_info = vinfo_for_stmt (new_stmt);
8905     }
8906 
8907   vec_oprnds0.release ();
8908   vec_oprnds1.release ();
8909   vec_oprnds2.release ();
8910   vec_oprnds3.release ();
8911 
8912   return true;
8913 }
8914 
8915 /* vectorizable_comparison.
8916 
8917    Check if STMT is comparison expression that can be vectorized.
8918    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8919    comparison, put it in VEC_STMT, and insert it at GSI.
8920 
8921    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8922 
8923 static bool
8924 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8925 			 gimple **vec_stmt, tree reduc_def,
8926 			 slp_tree slp_node)
8927 {
8928   tree lhs, rhs1, rhs2;
8929   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8930   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8931   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8932   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8933   tree new_temp;
8934   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8935   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8936   int ndts = 2;
8937   poly_uint64 nunits;
8938   int ncopies;
8939   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8940   stmt_vec_info prev_stmt_info = NULL;
8941   int i, j;
8942   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8943   vec<tree> vec_oprnds0 = vNULL;
8944   vec<tree> vec_oprnds1 = vNULL;
8945   gimple *def_stmt;
8946   tree mask_type;
8947   tree mask;
8948 
8949   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8950     return false;
8951 
8952   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8953     return false;
8954 
8955   mask_type = vectype;
8956   nunits = TYPE_VECTOR_SUBPARTS (vectype);
8957 
8958   if (slp_node)
8959     ncopies = 1;
8960   else
8961     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8962 
8963   gcc_assert (ncopies >= 1);
8964   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8965       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8966 	   && reduc_def))
8967     return false;
8968 
8969   if (STMT_VINFO_LIVE_P (stmt_info))
8970     {
8971       if (dump_enabled_p ())
8972 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8973 			 "value used after loop.\n");
8974       return false;
8975     }
8976 
8977   if (!is_gimple_assign (stmt))
8978     return false;
8979 
8980   code = gimple_assign_rhs_code (stmt);
8981 
8982   if (TREE_CODE_CLASS (code) != tcc_comparison)
8983     return false;
8984 
8985   rhs1 = gimple_assign_rhs1 (stmt);
8986   rhs2 = gimple_assign_rhs2 (stmt);
8987 
8988   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8989 			   &dts[0], &vectype1))
8990     return false;
8991 
8992   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8993 			   &dts[1], &vectype2))
8994     return false;
8995 
8996   if (vectype1 && vectype2
8997       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8998 		   TYPE_VECTOR_SUBPARTS (vectype2)))
8999     return false;
9000 
9001   vectype = vectype1 ? vectype1 : vectype2;
9002 
9003   /* Invariant comparison.  */
9004   if (!vectype)
9005     {
9006       vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9007       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9008 	return false;
9009     }
9010   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9011     return false;
9012 
9013   /* Can't compare mask and non-mask types.  */
9014   if (vectype1 && vectype2
9015       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9016     return false;
9017 
9018   /* Boolean values may have another representation in vectors
9019      and therefore we prefer bit operations over comparison for
9020      them (which also works for scalar masks).  We store opcodes
9021      to use in bitop1 and bitop2.  Statement is vectorized as
9022        BITOP2 (rhs1 BITOP1 rhs2) or
9023        rhs1 BITOP2 (BITOP1 rhs2)
9024      depending on bitop1 and bitop2 arity.  */
9025   if (VECTOR_BOOLEAN_TYPE_P (vectype))
9026     {
9027       if (code == GT_EXPR)
9028 	{
9029 	  bitop1 = BIT_NOT_EXPR;
9030 	  bitop2 = BIT_AND_EXPR;
9031 	}
9032       else if (code == GE_EXPR)
9033 	{
9034 	  bitop1 = BIT_NOT_EXPR;
9035 	  bitop2 = BIT_IOR_EXPR;
9036 	}
9037       else if (code == LT_EXPR)
9038 	{
9039 	  bitop1 = BIT_NOT_EXPR;
9040 	  bitop2 = BIT_AND_EXPR;
9041 	  std::swap (rhs1, rhs2);
9042 	  std::swap (dts[0], dts[1]);
9043 	}
9044       else if (code == LE_EXPR)
9045 	{
9046 	  bitop1 = BIT_NOT_EXPR;
9047 	  bitop2 = BIT_IOR_EXPR;
9048 	  std::swap (rhs1, rhs2);
9049 	  std::swap (dts[0], dts[1]);
9050 	}
9051       else
9052 	{
9053 	  bitop1 = BIT_XOR_EXPR;
9054 	  if (code == EQ_EXPR)
9055 	    bitop2 = BIT_NOT_EXPR;
9056 	}
9057     }
9058 
9059   if (!vec_stmt)
9060     {
9061       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9062       if (!slp_node)
9063 	vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9064 				dts, ndts, NULL, NULL);
9065       if (bitop1 == NOP_EXPR)
9066 	return expand_vec_cmp_expr_p (vectype, mask_type, code);
9067       else
9068 	{
9069 	  machine_mode mode = TYPE_MODE (vectype);
9070 	  optab optab;
9071 
9072 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
9073 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9074 	    return false;
9075 
9076 	  if (bitop2 != NOP_EXPR)
9077 	    {
9078 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
9079 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9080 		return false;
9081 	    }
9082 	  return true;
9083 	}
9084     }
9085 
9086   /* Transform.  */
9087   if (!slp_node)
9088     {
9089       vec_oprnds0.create (1);
9090       vec_oprnds1.create (1);
9091     }
9092 
9093   /* Handle def.  */
9094   lhs = gimple_assign_lhs (stmt);
9095   mask = vect_create_destination_var (lhs, mask_type);
9096 
9097   /* Handle cmp expr.  */
9098   for (j = 0; j < ncopies; j++)
9099     {
9100       gassign *new_stmt = NULL;
9101       if (j == 0)
9102 	{
9103 	  if (slp_node)
9104 	    {
9105 	      auto_vec<tree, 2> ops;
9106 	      auto_vec<vec<tree>, 2> vec_defs;
9107 
9108 	      ops.safe_push (rhs1);
9109 	      ops.safe_push (rhs2);
9110 	      vect_get_slp_defs (ops, slp_node, &vec_defs);
9111 	      vec_oprnds1 = vec_defs.pop ();
9112 	      vec_oprnds0 = vec_defs.pop ();
9113 	    }
9114 	  else
9115 	    {
9116 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9117 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9118 	    }
9119 	}
9120       else
9121 	{
9122 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9123 						     vec_oprnds0.pop ());
9124 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9125 						     vec_oprnds1.pop ());
9126 	}
9127 
9128       if (!slp_node)
9129 	{
9130 	  vec_oprnds0.quick_push (vec_rhs1);
9131 	  vec_oprnds1.quick_push (vec_rhs2);
9132 	}
9133 
9134       /* Arguments are ready.  Create the new vector stmt.  */
9135       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9136 	{
9137 	  vec_rhs2 = vec_oprnds1[i];
9138 
9139 	  new_temp = make_ssa_name (mask);
9140 	  if (bitop1 == NOP_EXPR)
9141 	    {
9142 	      new_stmt = gimple_build_assign (new_temp, code,
9143 					      vec_rhs1, vec_rhs2);
9144 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
9145 	    }
9146 	  else
9147 	    {
9148 	      if (bitop1 == BIT_NOT_EXPR)
9149 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9150 	      else
9151 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9152 						vec_rhs2);
9153 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
9154 	      if (bitop2 != NOP_EXPR)
9155 		{
9156 		  tree res = make_ssa_name (mask);
9157 		  if (bitop2 == BIT_NOT_EXPR)
9158 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
9159 		  else
9160 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9161 						    new_temp);
9162 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
9163 		}
9164 	    }
9165 	  if (slp_node)
9166 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9167 	}
9168 
9169       if (slp_node)
9170 	continue;
9171 
9172       if (j == 0)
9173 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9174       else
9175 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9176 
9177       prev_stmt_info = vinfo_for_stmt (new_stmt);
9178     }
9179 
9180   vec_oprnds0.release ();
9181   vec_oprnds1.release ();
9182 
9183   return true;
9184 }
9185 
9186 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9187    can handle all live statements in the node.  Otherwise return true
9188    if STMT is not live or if vectorizable_live_operation can handle it.
9189    GSI and VEC_STMT are as for vectorizable_live_operation.  */
9190 
9191 static bool
9192 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9193 			  slp_tree slp_node, gimple **vec_stmt)
9194 {
9195   if (slp_node)
9196     {
9197       gimple *slp_stmt;
9198       unsigned int i;
9199       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9200 	{
9201 	  stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9202 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
9203 	      && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9204 					       vec_stmt))
9205 	    return false;
9206 	}
9207     }
9208   else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9209 	   && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9210     return false;
9211 
9212   return true;
9213 }
9214 
9215 /* Make sure the statement is vectorizable.  */
9216 
9217 bool
9218 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9219 		   slp_instance node_instance)
9220 {
9221   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9222   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9223   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9224   bool ok;
9225   gimple *pattern_stmt;
9226   gimple_seq pattern_def_seq;
9227 
9228   if (dump_enabled_p ())
9229     {
9230       dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9231       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9232     }
9233 
9234   if (gimple_has_volatile_ops (stmt))
9235     {
9236       if (dump_enabled_p ())
9237         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9238                          "not vectorized: stmt has volatile operands\n");
9239 
9240       return false;
9241     }
9242 
9243   /* Skip stmts that do not need to be vectorized. In loops this is expected
9244      to include:
9245      - the COND_EXPR which is the loop exit condition
9246      - any LABEL_EXPRs in the loop
9247      - computations that are used only for array indexing or loop control.
9248      In basic blocks we only analyze statements that are a part of some SLP
9249      instance, therefore, all the statements are relevant.
9250 
9251      Pattern statement needs to be analyzed instead of the original statement
9252      if the original statement is not relevant.  Otherwise, we analyze both
9253      statements.  In basic blocks we are called from some SLP instance
9254      traversal, don't analyze pattern stmts instead, the pattern stmts
9255      already will be part of SLP instance.  */
9256 
9257   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9258   if (!STMT_VINFO_RELEVANT_P (stmt_info)
9259       && !STMT_VINFO_LIVE_P (stmt_info))
9260     {
9261       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9262           && pattern_stmt
9263           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9264               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9265         {
9266           /* Analyze PATTERN_STMT instead of the original stmt.  */
9267           stmt = pattern_stmt;
9268           stmt_info = vinfo_for_stmt (pattern_stmt);
9269           if (dump_enabled_p ())
9270             {
9271               dump_printf_loc (MSG_NOTE, vect_location,
9272                                "==> examining pattern statement: ");
9273               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9274             }
9275         }
9276       else
9277         {
9278           if (dump_enabled_p ())
9279             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9280 
9281           return true;
9282         }
9283     }
9284   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9285 	   && node == NULL
9286            && pattern_stmt
9287            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9288                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9289     {
9290       /* Analyze PATTERN_STMT too.  */
9291       if (dump_enabled_p ())
9292         {
9293           dump_printf_loc (MSG_NOTE, vect_location,
9294                            "==> examining pattern statement: ");
9295           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9296         }
9297 
9298       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9299 			      node_instance))
9300         return false;
9301    }
9302 
9303   if (is_pattern_stmt_p (stmt_info)
9304       && node == NULL
9305       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9306     {
9307       gimple_stmt_iterator si;
9308 
9309       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9310 	{
9311 	  gimple *pattern_def_stmt = gsi_stmt (si);
9312 	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9313 	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9314 	    {
9315 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
9316 	      if (dump_enabled_p ())
9317 		{
9318 		  dump_printf_loc (MSG_NOTE, vect_location,
9319                                    "==> examining pattern def statement: ");
9320 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9321 		}
9322 
9323 	      if (!vect_analyze_stmt (pattern_def_stmt,
9324 				      need_to_vectorize, node, node_instance))
9325 		return false;
9326 	    }
9327 	}
9328     }
9329 
9330   switch (STMT_VINFO_DEF_TYPE (stmt_info))
9331     {
9332       case vect_internal_def:
9333         break;
9334 
9335       case vect_reduction_def:
9336       case vect_nested_cycle:
9337          gcc_assert (!bb_vinfo
9338 		     && (relevance == vect_used_in_outer
9339 			 || relevance == vect_used_in_outer_by_reduction
9340 			 || relevance == vect_used_by_reduction
9341 			 || relevance == vect_unused_in_scope
9342 			 || relevance == vect_used_only_live));
9343          break;
9344 
9345       case vect_induction_def:
9346 	gcc_assert (!bb_vinfo);
9347 	break;
9348 
9349       case vect_constant_def:
9350       case vect_external_def:
9351       case vect_unknown_def_type:
9352       default:
9353         gcc_unreachable ();
9354     }
9355 
9356   if (STMT_VINFO_RELEVANT_P (stmt_info))
9357     {
9358       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9359       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9360 		  || (is_gimple_call (stmt)
9361 		      && gimple_call_lhs (stmt) == NULL_TREE));
9362       *need_to_vectorize = true;
9363     }
9364 
9365   if (PURE_SLP_STMT (stmt_info) && !node)
9366     {
9367       dump_printf_loc (MSG_NOTE, vect_location,
9368 		       "handled only by SLP analysis\n");
9369       return true;
9370     }
9371 
9372   ok = true;
9373   if (!bb_vinfo
9374       && (STMT_VINFO_RELEVANT_P (stmt_info)
9375 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9376     ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9377 	  || vectorizable_conversion (stmt, NULL, NULL, node)
9378 	  || vectorizable_shift (stmt, NULL, NULL, node)
9379 	  || vectorizable_operation (stmt, NULL, NULL, node)
9380 	  || vectorizable_assignment (stmt, NULL, NULL, node)
9381 	  || vectorizable_load (stmt, NULL, NULL, node, NULL)
9382 	  || vectorizable_call (stmt, NULL, NULL, node)
9383 	  || vectorizable_store (stmt, NULL, NULL, node)
9384 	  || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
9385 	  || vectorizable_induction (stmt, NULL, NULL, node)
9386 	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9387 	  || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9388   else
9389     {
9390       if (bb_vinfo)
9391 	ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9392 	      || vectorizable_conversion (stmt, NULL, NULL, node)
9393 	      || vectorizable_shift (stmt, NULL, NULL, node)
9394 	      || vectorizable_operation (stmt, NULL, NULL, node)
9395 	      || vectorizable_assignment (stmt, NULL, NULL, node)
9396 	      || vectorizable_load (stmt, NULL, NULL, node, NULL)
9397 	      || vectorizable_call (stmt, NULL, NULL, node)
9398 	      || vectorizable_store (stmt, NULL, NULL, node)
9399 	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9400 	      || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9401     }
9402 
9403   if (!ok)
9404     {
9405       if (dump_enabled_p ())
9406         {
9407           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9408                            "not vectorized: relevant stmt not ");
9409           dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9410           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9411         }
9412 
9413       return false;
9414     }
9415 
9416   if (bb_vinfo)
9417     return true;
9418 
9419   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9420       need extra handling, except for vectorizable reductions.  */
9421   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9422       && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9423     {
9424       if (dump_enabled_p ())
9425         {
9426           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9427                            "not vectorized: live stmt not supported: ");
9428           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9429         }
9430 
9431        return false;
9432     }
9433 
9434   return true;
9435 }
9436 
9437 
9438 /* Function vect_transform_stmt.
9439 
9440    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
9441 
9442 bool
9443 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9444 		     bool *grouped_store, slp_tree slp_node,
9445                      slp_instance slp_node_instance)
9446 {
9447   bool is_store = false;
9448   gimple *vec_stmt = NULL;
9449   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9450   bool done;
9451 
9452   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9453   gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9454 
9455   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9456 		   && nested_in_vect_loop_p
9457 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9458 			 stmt));
9459 
9460   switch (STMT_VINFO_TYPE (stmt_info))
9461     {
9462     case type_demotion_vec_info_type:
9463     case type_promotion_vec_info_type:
9464     case type_conversion_vec_info_type:
9465       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9466       gcc_assert (done);
9467       break;
9468 
9469     case induc_vec_info_type:
9470       done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9471       gcc_assert (done);
9472       break;
9473 
9474     case shift_vec_info_type:
9475       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9476       gcc_assert (done);
9477       break;
9478 
9479     case op_vec_info_type:
9480       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9481       gcc_assert (done);
9482       break;
9483 
9484     case assignment_vec_info_type:
9485       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9486       gcc_assert (done);
9487       break;
9488 
9489     case load_vec_info_type:
9490       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9491                                 slp_node_instance);
9492       gcc_assert (done);
9493       break;
9494 
9495     case store_vec_info_type:
9496       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9497       gcc_assert (done);
9498       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9499 	{
9500 	  /* In case of interleaving, the whole chain is vectorized when the
9501 	     last store in the chain is reached.  Store stmts before the last
9502 	     one are skipped, and there vec_stmt_info shouldn't be freed
9503 	     meanwhile.  */
9504 	  *grouped_store = true;
9505 	  stmt_vec_info group_info
9506 	    = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9507 	  if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
9508 	    is_store = true;
9509 	}
9510       else
9511 	is_store = true;
9512       break;
9513 
9514     case condition_vec_info_type:
9515       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9516       gcc_assert (done);
9517       break;
9518 
9519     case comparison_vec_info_type:
9520       done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9521       gcc_assert (done);
9522       break;
9523 
9524     case call_vec_info_type:
9525       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9526       stmt = gsi_stmt (*gsi);
9527       break;
9528 
9529     case call_simd_clone_vec_info_type:
9530       done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9531       stmt = gsi_stmt (*gsi);
9532       break;
9533 
9534     case reduc_vec_info_type:
9535       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9536 				     slp_node_instance);
9537       gcc_assert (done);
9538       break;
9539 
9540     default:
9541       if (!STMT_VINFO_LIVE_P (stmt_info))
9542 	{
9543 	  if (dump_enabled_p ())
9544 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9545                              "stmt not supported.\n");
9546 	  gcc_unreachable ();
9547 	}
9548     }
9549 
9550   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9551      This would break hybrid SLP vectorization.  */
9552   if (slp_node)
9553     gcc_assert (!vec_stmt
9554 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9555 
9556   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9557      is being vectorized, but outside the immediately enclosing loop.  */
9558   if (vec_stmt
9559       && nested_p
9560       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9561       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9562           || STMT_VINFO_RELEVANT (stmt_info) ==
9563                                            vect_used_in_outer_by_reduction))
9564     {
9565       struct loop *innerloop = LOOP_VINFO_LOOP (
9566                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9567       imm_use_iterator imm_iter;
9568       use_operand_p use_p;
9569       tree scalar_dest;
9570       gimple *exit_phi;
9571 
9572       if (dump_enabled_p ())
9573         dump_printf_loc (MSG_NOTE, vect_location,
9574                          "Record the vdef for outer-loop vectorization.\n");
9575 
9576       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9577         (to be used when vectorizing outer-loop stmts that use the DEF of
9578         STMT).  */
9579       if (gimple_code (stmt) == GIMPLE_PHI)
9580         scalar_dest = PHI_RESULT (stmt);
9581       else
9582         scalar_dest = gimple_assign_lhs (stmt);
9583 
9584       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9585        {
9586          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9587            {
9588              exit_phi = USE_STMT (use_p);
9589              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9590            }
9591        }
9592     }
9593 
9594   /* Handle stmts whose DEF is used outside the loop-nest that is
9595      being vectorized.  */
9596   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9597     {
9598       done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9599       gcc_assert (done);
9600     }
9601 
9602   if (vec_stmt)
9603     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9604 
9605   return is_store;
9606 }
9607 
9608 
9609 /* Remove a group of stores (for SLP or interleaving), free their
9610    stmt_vec_info.  */
9611 
9612 void
9613 vect_remove_stores (gimple *first_stmt)
9614 {
9615   gimple *next = first_stmt;
9616   gimple *tmp;
9617   gimple_stmt_iterator next_si;
9618 
9619   while (next)
9620     {
9621       stmt_vec_info stmt_info = vinfo_for_stmt (next);
9622 
9623       tmp = GROUP_NEXT_ELEMENT (stmt_info);
9624       if (is_pattern_stmt_p (stmt_info))
9625 	next = STMT_VINFO_RELATED_STMT (stmt_info);
9626       /* Free the attached stmt_vec_info and remove the stmt.  */
9627       next_si = gsi_for_stmt (next);
9628       unlink_stmt_vdef (next);
9629       gsi_remove (&next_si, true);
9630       release_defs (next);
9631       free_stmt_vec_info (next);
9632       next = tmp;
9633     }
9634 }
9635 
9636 
9637 /* Function new_stmt_vec_info.
9638 
9639    Create and initialize a new stmt_vec_info struct for STMT.  */
9640 
9641 stmt_vec_info
9642 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9643 {
9644   stmt_vec_info res;
9645   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9646 
9647   STMT_VINFO_TYPE (res) = undef_vec_info_type;
9648   STMT_VINFO_STMT (res) = stmt;
9649   res->vinfo = vinfo;
9650   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9651   STMT_VINFO_LIVE_P (res) = false;
9652   STMT_VINFO_VECTYPE (res) = NULL;
9653   STMT_VINFO_VEC_STMT (res) = NULL;
9654   STMT_VINFO_VECTORIZABLE (res) = true;
9655   STMT_VINFO_IN_PATTERN_P (res) = false;
9656   STMT_VINFO_RELATED_STMT (res) = NULL;
9657   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9658   STMT_VINFO_DATA_REF (res) = NULL;
9659   STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9660   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9661 
9662   if (gimple_code (stmt) == GIMPLE_PHI
9663       && is_loop_header_bb_p (gimple_bb (stmt)))
9664     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9665   else
9666     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9667 
9668   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9669   STMT_SLP_TYPE (res) = loop_vect;
9670   STMT_VINFO_NUM_SLP_USES (res) = 0;
9671 
9672   GROUP_FIRST_ELEMENT (res) = NULL;
9673   GROUP_NEXT_ELEMENT (res) = NULL;
9674   GROUP_SIZE (res) = 0;
9675   GROUP_STORE_COUNT (res) = 0;
9676   GROUP_GAP (res) = 0;
9677   GROUP_SAME_DR_STMT (res) = NULL;
9678 
9679   return res;
9680 }
9681 
9682 
9683 /* Create a hash table for stmt_vec_info. */
9684 
9685 void
9686 init_stmt_vec_info_vec (void)
9687 {
9688   gcc_assert (!stmt_vec_info_vec.exists ());
9689   stmt_vec_info_vec.create (50);
9690 }
9691 
9692 
9693 /* Free hash table for stmt_vec_info. */
9694 
9695 void
9696 free_stmt_vec_info_vec (void)
9697 {
9698   unsigned int i;
9699   stmt_vec_info info;
9700   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9701     if (info != NULL)
9702       free_stmt_vec_info (STMT_VINFO_STMT (info));
9703   gcc_assert (stmt_vec_info_vec.exists ());
9704   stmt_vec_info_vec.release ();
9705 }
9706 
9707 
9708 /* Free stmt vectorization related info.  */
9709 
9710 void
9711 free_stmt_vec_info (gimple *stmt)
9712 {
9713   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9714 
9715   if (!stmt_info)
9716     return;
9717 
9718   /* Check if this statement has a related "pattern stmt"
9719      (introduced by the vectorizer during the pattern recognition
9720      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9721      too.  */
9722   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9723     {
9724       stmt_vec_info patt_info
9725 	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9726       if (patt_info)
9727 	{
9728 	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9729 	  gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9730 	  gimple_set_bb (patt_stmt, NULL);
9731 	  tree lhs = gimple_get_lhs (patt_stmt);
9732 	  if (lhs && TREE_CODE (lhs) == SSA_NAME)
9733 	    release_ssa_name (lhs);
9734 	  if (seq)
9735 	    {
9736 	      gimple_stmt_iterator si;
9737 	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9738 		{
9739 		  gimple *seq_stmt = gsi_stmt (si);
9740 		  gimple_set_bb (seq_stmt, NULL);
9741 		  lhs = gimple_get_lhs (seq_stmt);
9742 		  if (lhs && TREE_CODE (lhs) == SSA_NAME)
9743 		    release_ssa_name (lhs);
9744 		  free_stmt_vec_info (seq_stmt);
9745 		}
9746 	    }
9747 	  free_stmt_vec_info (patt_stmt);
9748 	}
9749     }
9750 
9751   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9752   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9753   set_vinfo_for_stmt (stmt, NULL);
9754   free (stmt_info);
9755 }
9756 
9757 
9758 /* Function get_vectype_for_scalar_type_and_size.
9759 
9760    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
9761    by the target.  */
9762 
9763 tree
9764 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9765 {
9766   tree orig_scalar_type = scalar_type;
9767   scalar_mode inner_mode;
9768   machine_mode simd_mode;
9769   poly_uint64 nunits;
9770   tree vectype;
9771 
9772   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9773       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9774     return NULL_TREE;
9775 
9776   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9777 
9778   /* For vector types of elements whose mode precision doesn't
9779      match their types precision we use a element type of mode
9780      precision.  The vectorization routines will have to make sure
9781      they support the proper result truncation/extension.
9782      We also make sure to build vector types with INTEGER_TYPE
9783      component type only.  */
9784   if (INTEGRAL_TYPE_P (scalar_type)
9785       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9786 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
9787     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9788 						  TYPE_UNSIGNED (scalar_type));
9789 
9790   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9791      When the component mode passes the above test simply use a type
9792      corresponding to that mode.  The theory is that any use that
9793      would cause problems with this will disable vectorization anyway.  */
9794   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9795 	   && !INTEGRAL_TYPE_P (scalar_type))
9796     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9797 
9798   /* We can't build a vector type of elements with alignment bigger than
9799      their size.  */
9800   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9801     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9802 						  TYPE_UNSIGNED (scalar_type));
9803 
9804   /* If we felt back to using the mode fail if there was
9805      no scalar type for it.  */
9806   if (scalar_type == NULL_TREE)
9807     return NULL_TREE;
9808 
9809   /* If no size was supplied use the mode the target prefers.   Otherwise
9810      lookup a vector mode of the specified size.  */
9811   if (known_eq (size, 0U))
9812     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9813   else if (!multiple_p (size, nbytes, &nunits)
9814 	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9815     return NULL_TREE;
9816   /* NOTE: nunits == 1 is allowed to support single element vector types.  */
9817   if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9818     return NULL_TREE;
9819 
9820   vectype = build_vector_type (scalar_type, nunits);
9821 
9822   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9823       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9824     return NULL_TREE;
9825 
9826   /* Re-attach the address-space qualifier if we canonicalized the scalar
9827      type.  */
9828   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9829     return build_qualified_type
9830 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9831 
9832   return vectype;
9833 }
9834 
9835 poly_uint64 current_vector_size;
9836 
9837 /* Function get_vectype_for_scalar_type.
9838 
9839    Returns the vector type corresponding to SCALAR_TYPE as supported
9840    by the target.  */
9841 
9842 tree
9843 get_vectype_for_scalar_type (tree scalar_type)
9844 {
9845   tree vectype;
9846   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9847 						  current_vector_size);
9848   if (vectype
9849       && known_eq (current_vector_size, 0U))
9850     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9851   return vectype;
9852 }
9853 
9854 /* Function get_mask_type_for_scalar_type.
9855 
9856    Returns the mask type corresponding to a result of comparison
9857    of vectors of specified SCALAR_TYPE as supported by target.  */
9858 
9859 tree
9860 get_mask_type_for_scalar_type (tree scalar_type)
9861 {
9862   tree vectype = get_vectype_for_scalar_type (scalar_type);
9863 
9864   if (!vectype)
9865     return NULL;
9866 
9867   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9868 				  current_vector_size);
9869 }
9870 
9871 /* Function get_same_sized_vectype
9872 
9873    Returns a vector type corresponding to SCALAR_TYPE of size
9874    VECTOR_TYPE if supported by the target.  */
9875 
9876 tree
9877 get_same_sized_vectype (tree scalar_type, tree vector_type)
9878 {
9879   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9880     return build_same_sized_truth_vector_type (vector_type);
9881 
9882   return get_vectype_for_scalar_type_and_size
9883 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9884 }
9885 
9886 /* Function vect_is_simple_use.
9887 
9888    Input:
9889    VINFO - the vect info of the loop or basic block that is being vectorized.
9890    OPERAND - operand in the loop or bb.
9891    Output:
9892    DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9893    DT - the type of definition
9894 
9895    Returns whether a stmt with OPERAND can be vectorized.
9896    For loops, supportable operands are constants, loop invariants, and operands
9897    that are defined by the current iteration of the loop.  Unsupportable
9898    operands are those that are defined by a previous iteration of the loop (as
9899    is the case in reduction/induction computations).
9900    For basic blocks, supportable operands are constants and bb invariants.
9901    For now, operands defined outside the basic block are not supported.  */
9902 
9903 bool
9904 vect_is_simple_use (tree operand, vec_info *vinfo,
9905                     gimple **def_stmt, enum vect_def_type *dt)
9906 {
9907   *def_stmt = NULL;
9908   *dt = vect_unknown_def_type;
9909 
9910   if (dump_enabled_p ())
9911     {
9912       dump_printf_loc (MSG_NOTE, vect_location,
9913                        "vect_is_simple_use: operand ");
9914       dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9915       dump_printf (MSG_NOTE, "\n");
9916     }
9917 
9918   if (CONSTANT_CLASS_P (operand))
9919     {
9920       *dt = vect_constant_def;
9921       return true;
9922     }
9923 
9924   if (is_gimple_min_invariant (operand))
9925     {
9926       *dt = vect_external_def;
9927       return true;
9928     }
9929 
9930   if (TREE_CODE (operand) != SSA_NAME)
9931     {
9932       if (dump_enabled_p ())
9933 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9934 			 "not ssa-name.\n");
9935       return false;
9936     }
9937 
9938   if (SSA_NAME_IS_DEFAULT_DEF (operand))
9939     {
9940       *dt = vect_external_def;
9941       return true;
9942     }
9943 
9944   *def_stmt = SSA_NAME_DEF_STMT (operand);
9945   if (dump_enabled_p ())
9946     {
9947       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9948       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9949     }
9950 
9951   if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9952     *dt = vect_external_def;
9953   else
9954     {
9955       stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9956       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9957     }
9958 
9959   if (dump_enabled_p ())
9960     {
9961       dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9962       switch (*dt)
9963 	{
9964 	case vect_uninitialized_def:
9965 	  dump_printf (MSG_NOTE, "uninitialized\n");
9966 	  break;
9967 	case vect_constant_def:
9968 	  dump_printf (MSG_NOTE, "constant\n");
9969 	  break;
9970 	case vect_external_def:
9971 	  dump_printf (MSG_NOTE, "external\n");
9972 	  break;
9973 	case vect_internal_def:
9974 	  dump_printf (MSG_NOTE, "internal\n");
9975 	  break;
9976 	case vect_induction_def:
9977 	  dump_printf (MSG_NOTE, "induction\n");
9978 	  break;
9979 	case vect_reduction_def:
9980 	  dump_printf (MSG_NOTE, "reduction\n");
9981 	  break;
9982 	case vect_double_reduction_def:
9983 	  dump_printf (MSG_NOTE, "double reduction\n");
9984 	  break;
9985 	case vect_nested_cycle:
9986 	  dump_printf (MSG_NOTE, "nested cycle\n");
9987 	  break;
9988 	case vect_unknown_def_type:
9989 	  dump_printf (MSG_NOTE, "unknown\n");
9990 	  break;
9991 	}
9992     }
9993 
9994   if (*dt == vect_unknown_def_type)
9995     {
9996       if (dump_enabled_p ())
9997         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9998                          "Unsupported pattern.\n");
9999       return false;
10000     }
10001 
10002   switch (gimple_code (*def_stmt))
10003     {
10004     case GIMPLE_PHI:
10005     case GIMPLE_ASSIGN:
10006     case GIMPLE_CALL:
10007       break;
10008     default:
10009       if (dump_enabled_p ())
10010         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10011                          "unsupported defining stmt:\n");
10012       return false;
10013     }
10014 
10015   return true;
10016 }
10017 
10018 /* Function vect_is_simple_use.
10019 
10020    Same as vect_is_simple_use but also determines the vector operand
10021    type of OPERAND and stores it to *VECTYPE.  If the definition of
10022    OPERAND is vect_uninitialized_def, vect_constant_def or
10023    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10024    is responsible to compute the best suited vector type for the
10025    scalar operand.  */
10026 
10027 bool
10028 vect_is_simple_use (tree operand, vec_info *vinfo,
10029 		    gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10030 {
10031   if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10032     return false;
10033 
10034   /* Now get a vector type if the def is internal, otherwise supply
10035      NULL_TREE and leave it up to the caller to figure out a proper
10036      type for the use stmt.  */
10037   if (*dt == vect_internal_def
10038       || *dt == vect_induction_def
10039       || *dt == vect_reduction_def
10040       || *dt == vect_double_reduction_def
10041       || *dt == vect_nested_cycle)
10042     {
10043       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10044 
10045       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10046           && !STMT_VINFO_RELEVANT (stmt_info)
10047           && !STMT_VINFO_LIVE_P (stmt_info))
10048 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10049 
10050       *vectype = STMT_VINFO_VECTYPE (stmt_info);
10051       gcc_assert (*vectype != NULL_TREE);
10052     }
10053   else if (*dt == vect_uninitialized_def
10054 	   || *dt == vect_constant_def
10055 	   || *dt == vect_external_def)
10056     *vectype = NULL_TREE;
10057   else
10058     gcc_unreachable ();
10059 
10060   return true;
10061 }
10062 
10063 
10064 /* Function supportable_widening_operation
10065 
10066    Check whether an operation represented by the code CODE is a
10067    widening operation that is supported by the target platform in
10068    vector form (i.e., when operating on arguments of type VECTYPE_IN
10069    producing a result of type VECTYPE_OUT).
10070 
10071    Widening operations we currently support are NOP (CONVERT), FLOAT
10072    and WIDEN_MULT.  This function checks if these operations are supported
10073    by the target platform either directly (via vector tree-codes), or via
10074    target builtins.
10075 
10076    Output:
10077    - CODE1 and CODE2 are codes of vector operations to be used when
10078    vectorizing the operation, if available.
10079    - MULTI_STEP_CVT determines the number of required intermediate steps in
10080    case of multi-step conversion (like char->short->int - in that case
10081    MULTI_STEP_CVT will be 1).
10082    - INTERM_TYPES contains the intermediate type required to perform the
10083    widening operation (short in the above example).  */
10084 
10085 bool
10086 supportable_widening_operation (enum tree_code code, gimple *stmt,
10087 				tree vectype_out, tree vectype_in,
10088                                 enum tree_code *code1, enum tree_code *code2,
10089                                 int *multi_step_cvt,
10090                                 vec<tree> *interm_types)
10091 {
10092   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10093   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10094   struct loop *vect_loop = NULL;
10095   machine_mode vec_mode;
10096   enum insn_code icode1, icode2;
10097   optab optab1, optab2;
10098   tree vectype = vectype_in;
10099   tree wide_vectype = vectype_out;
10100   enum tree_code c1, c2;
10101   int i;
10102   tree prev_type, intermediate_type;
10103   machine_mode intermediate_mode, prev_mode;
10104   optab optab3, optab4;
10105 
10106   *multi_step_cvt = 0;
10107   if (loop_info)
10108     vect_loop = LOOP_VINFO_LOOP (loop_info);
10109 
10110   switch (code)
10111     {
10112     case WIDEN_MULT_EXPR:
10113       /* The result of a vectorized widening operation usually requires
10114 	 two vectors (because the widened results do not fit into one vector).
10115 	 The generated vector results would normally be expected to be
10116 	 generated in the same order as in the original scalar computation,
10117 	 i.e. if 8 results are generated in each vector iteration, they are
10118 	 to be organized as follows:
10119 		vect1: [res1,res2,res3,res4],
10120 		vect2: [res5,res6,res7,res8].
10121 
10122 	 However, in the special case that the result of the widening
10123 	 operation is used in a reduction computation only, the order doesn't
10124 	 matter (because when vectorizing a reduction we change the order of
10125 	 the computation).  Some targets can take advantage of this and
10126 	 generate more efficient code.  For example, targets like Altivec,
10127 	 that support widen_mult using a sequence of {mult_even,mult_odd}
10128 	 generate the following vectors:
10129 		vect1: [res1,res3,res5,res7],
10130 		vect2: [res2,res4,res6,res8].
10131 
10132 	 When vectorizing outer-loops, we execute the inner-loop sequentially
10133 	 (each vectorized inner-loop iteration contributes to VF outer-loop
10134 	 iterations in parallel).  We therefore don't allow to change the
10135 	 order of the computation in the inner-loop during outer-loop
10136 	 vectorization.  */
10137       /* TODO: Another case in which order doesn't *really* matter is when we
10138 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
10139 	 Normally, pack_trunc performs an even/odd permute, whereas the
10140 	 repack from an even/odd expansion would be an interleave, which
10141 	 would be significantly simpler for e.g. AVX2.  */
10142       /* In any case, in order to avoid duplicating the code below, recurse
10143 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
10144 	 are properly set up for the caller.  If we fail, we'll continue with
10145 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
10146       if (vect_loop
10147 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10148 	  && !nested_in_vect_loop_p (vect_loop, stmt)
10149 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10150 					     stmt, vectype_out, vectype_in,
10151 					     code1, code2, multi_step_cvt,
10152 					     interm_types))
10153         {
10154           /* Elements in a vector with vect_used_by_reduction property cannot
10155              be reordered if the use chain with this property does not have the
10156              same operation.  One such an example is s += a * b, where elements
10157              in a and b cannot be reordered.  Here we check if the vector defined
10158              by STMT is only directly used in the reduction statement.  */
10159           tree lhs = gimple_assign_lhs (stmt);
10160           use_operand_p dummy;
10161           gimple *use_stmt;
10162           stmt_vec_info use_stmt_info = NULL;
10163           if (single_imm_use (lhs, &dummy, &use_stmt)
10164               && (use_stmt_info = vinfo_for_stmt (use_stmt))
10165               && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10166             return true;
10167         }
10168       c1 = VEC_WIDEN_MULT_LO_EXPR;
10169       c2 = VEC_WIDEN_MULT_HI_EXPR;
10170       break;
10171 
10172     case DOT_PROD_EXPR:
10173       c1 = DOT_PROD_EXPR;
10174       c2 = DOT_PROD_EXPR;
10175       break;
10176 
10177     case SAD_EXPR:
10178       c1 = SAD_EXPR;
10179       c2 = SAD_EXPR;
10180       break;
10181 
10182     case VEC_WIDEN_MULT_EVEN_EXPR:
10183       /* Support the recursion induced just above.  */
10184       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10185       c2 = VEC_WIDEN_MULT_ODD_EXPR;
10186       break;
10187 
10188     case WIDEN_LSHIFT_EXPR:
10189       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10190       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10191       break;
10192 
10193     CASE_CONVERT:
10194       c1 = VEC_UNPACK_LO_EXPR;
10195       c2 = VEC_UNPACK_HI_EXPR;
10196       break;
10197 
10198     case FLOAT_EXPR:
10199       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10200       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10201       break;
10202 
10203     case FIX_TRUNC_EXPR:
10204       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10205 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10206 	 computing the operation.  */
10207       return false;
10208 
10209     default:
10210       gcc_unreachable ();
10211     }
10212 
10213   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10214     std::swap (c1, c2);
10215 
10216   if (code == FIX_TRUNC_EXPR)
10217     {
10218       /* The signedness is determined from output operand.  */
10219       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10220       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10221     }
10222   else
10223     {
10224       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10225       optab2 = optab_for_tree_code (c2, vectype, optab_default);
10226     }
10227 
10228   if (!optab1 || !optab2)
10229     return false;
10230 
10231   vec_mode = TYPE_MODE (vectype);
10232   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10233        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10234     return false;
10235 
10236   *code1 = c1;
10237   *code2 = c2;
10238 
10239   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10240       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10241       /* For scalar masks we may have different boolean
10242 	 vector types having the same QImode.  Thus we
10243 	 add additional check for elements number.  */
10244     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10245 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10246 			 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10247 
10248   /* Check if it's a multi-step conversion that can be done using intermediate
10249      types.  */
10250 
10251   prev_type = vectype;
10252   prev_mode = vec_mode;
10253 
10254   if (!CONVERT_EXPR_CODE_P (code))
10255     return false;
10256 
10257   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10258      intermediate steps in promotion sequence.  We try
10259      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10260      not.  */
10261   interm_types->create (MAX_INTERM_CVT_STEPS);
10262   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10263     {
10264       intermediate_mode = insn_data[icode1].operand[0].mode;
10265       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10266 	{
10267 	  intermediate_type = vect_halve_mask_nunits (prev_type);
10268 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10269 	    return false;
10270 	}
10271       else
10272 	intermediate_type
10273 	  = lang_hooks.types.type_for_mode (intermediate_mode,
10274 					    TYPE_UNSIGNED (prev_type));
10275 
10276       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10277       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10278 
10279       if (!optab3 || !optab4
10280           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10281 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10282 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10283 	  || insn_data[icode2].operand[0].mode != intermediate_mode
10284 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
10285 	      == CODE_FOR_nothing)
10286 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
10287 	      == CODE_FOR_nothing))
10288 	break;
10289 
10290       interm_types->quick_push (intermediate_type);
10291       (*multi_step_cvt)++;
10292 
10293       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10294 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10295 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10296 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10297 			     TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10298 
10299       prev_type = intermediate_type;
10300       prev_mode = intermediate_mode;
10301     }
10302 
10303   interm_types->release ();
10304   return false;
10305 }
10306 
10307 
10308 /* Function supportable_narrowing_operation
10309 
10310    Check whether an operation represented by the code CODE is a
10311    narrowing operation that is supported by the target platform in
10312    vector form (i.e., when operating on arguments of type VECTYPE_IN
10313    and producing a result of type VECTYPE_OUT).
10314 
10315    Narrowing operations we currently support are NOP (CONVERT) and
10316    FIX_TRUNC.  This function checks if these operations are supported by
10317    the target platform directly via vector tree-codes.
10318 
10319    Output:
10320    - CODE1 is the code of a vector operation to be used when
10321    vectorizing the operation, if available.
10322    - MULTI_STEP_CVT determines the number of required intermediate steps in
10323    case of multi-step conversion (like int->short->char - in that case
10324    MULTI_STEP_CVT will be 1).
10325    - INTERM_TYPES contains the intermediate type required to perform the
10326    narrowing operation (short in the above example).   */
10327 
10328 bool
10329 supportable_narrowing_operation (enum tree_code code,
10330 				 tree vectype_out, tree vectype_in,
10331 				 enum tree_code *code1, int *multi_step_cvt,
10332                                  vec<tree> *interm_types)
10333 {
10334   machine_mode vec_mode;
10335   enum insn_code icode1;
10336   optab optab1, interm_optab;
10337   tree vectype = vectype_in;
10338   tree narrow_vectype = vectype_out;
10339   enum tree_code c1;
10340   tree intermediate_type, prev_type;
10341   machine_mode intermediate_mode, prev_mode;
10342   int i;
10343   bool uns;
10344 
10345   *multi_step_cvt = 0;
10346   switch (code)
10347     {
10348     CASE_CONVERT:
10349       c1 = VEC_PACK_TRUNC_EXPR;
10350       break;
10351 
10352     case FIX_TRUNC_EXPR:
10353       c1 = VEC_PACK_FIX_TRUNC_EXPR;
10354       break;
10355 
10356     case FLOAT_EXPR:
10357       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10358 	 tree code and optabs used for computing the operation.  */
10359       return false;
10360 
10361     default:
10362       gcc_unreachable ();
10363     }
10364 
10365   if (code == FIX_TRUNC_EXPR)
10366     /* The signedness is determined from output operand.  */
10367     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10368   else
10369     optab1 = optab_for_tree_code (c1, vectype, optab_default);
10370 
10371   if (!optab1)
10372     return false;
10373 
10374   vec_mode = TYPE_MODE (vectype);
10375   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10376     return false;
10377 
10378   *code1 = c1;
10379 
10380   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10381     /* For scalar masks we may have different boolean
10382        vector types having the same QImode.  Thus we
10383        add additional check for elements number.  */
10384     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10385 	    || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10386 			 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10387 
10388   /* Check if it's a multi-step conversion that can be done using intermediate
10389      types.  */
10390   prev_mode = vec_mode;
10391   prev_type = vectype;
10392   if (code == FIX_TRUNC_EXPR)
10393     uns = TYPE_UNSIGNED (vectype_out);
10394   else
10395     uns = TYPE_UNSIGNED (vectype);
10396 
10397   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10398      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10399      costly than signed.  */
10400   if (code == FIX_TRUNC_EXPR && uns)
10401     {
10402       enum insn_code icode2;
10403 
10404       intermediate_type
10405 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10406       interm_optab
10407 	= optab_for_tree_code (c1, intermediate_type, optab_default);
10408       if (interm_optab != unknown_optab
10409 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10410 	  && insn_data[icode1].operand[0].mode
10411 	     == insn_data[icode2].operand[0].mode)
10412 	{
10413 	  uns = false;
10414 	  optab1 = interm_optab;
10415 	  icode1 = icode2;
10416 	}
10417     }
10418 
10419   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10420      intermediate steps in promotion sequence.  We try
10421      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
10422   interm_types->create (MAX_INTERM_CVT_STEPS);
10423   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10424     {
10425       intermediate_mode = insn_data[icode1].operand[0].mode;
10426       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10427 	{
10428 	  intermediate_type = vect_double_mask_nunits (prev_type);
10429 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10430 	    return false;
10431 	}
10432       else
10433 	intermediate_type
10434 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10435       interm_optab
10436 	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10437 			       optab_default);
10438       if (!interm_optab
10439 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10440 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10441 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10442 	      == CODE_FOR_nothing))
10443 	break;
10444 
10445       interm_types->quick_push (intermediate_type);
10446       (*multi_step_cvt)++;
10447 
10448       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10449 	return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10450 		|| known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10451 			     TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10452 
10453       prev_mode = intermediate_mode;
10454       prev_type = intermediate_type;
10455       optab1 = interm_optab;
10456     }
10457 
10458   interm_types->release ();
10459   return false;
10460 }
10461 
10462 /* Generate and return a statement that sets vector mask MASK such that
10463    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
10464 
10465 gcall *
10466 vect_gen_while (tree mask, tree start_index, tree end_index)
10467 {
10468   tree cmp_type = TREE_TYPE (start_index);
10469   tree mask_type = TREE_TYPE (mask);
10470   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10471 						       cmp_type, mask_type,
10472 						       OPTIMIZE_FOR_SPEED));
10473   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10474 					    start_index, end_index,
10475 					    build_zero_cst (mask_type));
10476   gimple_call_set_lhs (call, mask);
10477   return call;
10478 }
10479 
10480 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10481    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
10482 
10483 tree
10484 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10485 		    tree end_index)
10486 {
10487   tree tmp = make_ssa_name (mask_type);
10488   gcall *call = vect_gen_while (tmp, start_index, end_index);
10489   gimple_seq_add_stmt (seq, call);
10490   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10491 }
10492