1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58 
59 /* For lang_hooks.types.type_for_mode.  */
60 #include "langhooks.h"
61 
62 /* Return the vectorized type for the given statement.  */
63 
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67   return STMT_VINFO_VECTYPE (stmt_info);
68 }
69 
70 /* Return TRUE iff the given statement is in an inner loop relative to
71    the loop being vectorized.  */
72 bool
stmt_in_inner_loop_p(class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
74 {
75   gimple *stmt = STMT_VINFO_STMT (stmt_info);
76   basic_block bb = gimple_bb (stmt);
77   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78   class loop* loop;
79 
80   if (!loop_vinfo)
81     return false;
82 
83   loop = LOOP_VINFO_LOOP (loop_vinfo);
84 
85   return (bb->loop_father == loop->inner);
86 }
87 
88 /* Record the cost of a statement, either by directly informing the
89    target model or by saving it in a vector for later processing.
90    Return a preliminary estimate of the statement's cost.  */
91 
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 		  int misalign, enum vect_cost_model_location where)
96 {
97   if ((kind == vector_load || kind == unaligned_load)
98       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99     kind = vector_gather_load;
100   if ((kind == vector_store || kind == unaligned_store)
101       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102     kind = vector_scatter_store;
103 
104   stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105   body_cost_vec->safe_push (si);
106 
107   tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108   return (unsigned)
109       (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111 
112 /* Return a variable of type ELEM_TYPE[NELEMS].  */
113 
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 			 "vect_array");
119 }
120 
121 /* ARRAY is an array of vectors created by create_vector_array.
122    Return an SSA_NAME for the vector in index N.  The reference
123    is part of the vectorization of STMT_INFO and the vector is associated
124    with scalar destination SCALAR_DEST.  */
125 
126 static tree
read_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 {
130   tree vect_type, vect, vect_name, array_ref;
131   gimple *new_stmt;
132 
133   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134   vect_type = TREE_TYPE (TREE_TYPE (array));
135   vect = vect_create_destination_var (scalar_dest, vect_type);
136   array_ref = build4 (ARRAY_REF, vect_type, array,
137 		      build_int_cst (size_type_node, n),
138 		      NULL_TREE, NULL_TREE);
139 
140   new_stmt = gimple_build_assign (vect, array_ref);
141   vect_name = make_ssa_name (vect, new_stmt);
142   gimple_assign_set_lhs (new_stmt, vect_name);
143   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144 
145   return vect_name;
146 }
147 
148 /* ARRAY is an array of vectors created by create_vector_array.
149    Emit code to store SSA_NAME VECT in index N of the array.
150    The store is part of the vectorization of STMT_INFO.  */
151 
152 static void
write_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
155 {
156   tree array_ref;
157   gimple *new_stmt;
158 
159   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 		      build_int_cst (size_type_node, n),
161 		      NULL_TREE, NULL_TREE);
162 
163   new_stmt = gimple_build_assign (array_ref, vect);
164   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
165 }
166 
167 /* PTR is a pointer to an array of type TYPE.  Return a representation
168    of *PTR.  The memory reference replaces those in FIRST_DR
169    (and its group).  */
170 
171 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 {
174   tree mem_ref;
175 
176   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177   /* Arrays have the same alignment as their type.  */
178   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179   return mem_ref;
180 }
181 
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183    Emit the clobber before *GSI.  */
184 
185 static void
vect_clobber_variable(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 		       tree var)
188 {
189   tree clobber = build_clobber (TREE_TYPE (var));
190   gimple *new_stmt = gimple_build_assign (var, clobber);
191   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
192 }
193 
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
195 
196 /* Function vect_mark_relevant.
197 
198    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
199 
200 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 		    enum vect_relevant relevant, bool live_p)
203 {
204   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206 
207   if (dump_enabled_p ())
208     dump_printf_loc (MSG_NOTE, vect_location,
209 		     "mark relevant %d, live %d: %G", relevant, live_p,
210 		     stmt_info->stmt);
211 
212   /* If this stmt is an original stmt in a pattern, we might need to mark its
213      related pattern stmt instead of the original stmt.  However, such stmts
214      may have their own uses that are not in any pattern, in such cases the
215      stmt itself should be marked.  */
216   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217     {
218       /* This is the last stmt in a sequence that was detected as a
219 	 pattern that can potentially be vectorized.  Don't mark the stmt
220 	 as relevant/live because it's not going to be vectorized.
221 	 Instead mark the pattern-stmt that replaces it.  */
222 
223       if (dump_enabled_p ())
224 	dump_printf_loc (MSG_NOTE, vect_location,
225 			 "last stmt in pattern. don't mark"
226 			 " relevant/live.\n");
227       stmt_vec_info old_stmt_info = stmt_info;
228       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232     }
233 
234   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236     STMT_VINFO_RELEVANT (stmt_info) = relevant;
237 
238   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240     {
241       if (dump_enabled_p ())
242         dump_printf_loc (MSG_NOTE, vect_location,
243                          "already marked relevant/live.\n");
244       return;
245     }
246 
247   worklist->safe_push (stmt_info);
248 }
249 
250 
251 /* Function is_simple_and_all_uses_invariant
252 
253    Return true if STMT_INFO is simple and all uses of it are invariant.  */
254 
255 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 				  loop_vec_info loop_vinfo)
258 {
259   tree op;
260   ssa_op_iter iter;
261 
262   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263   if (!stmt)
264     return false;
265 
266   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267     {
268       enum vect_def_type dt = vect_uninitialized_def;
269 
270       if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 	{
272 	  if (dump_enabled_p ())
273 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 			     "use not simple.\n");
275 	  return false;
276 	}
277 
278       if (dt != vect_external_def && dt != vect_constant_def)
279 	return false;
280     }
281   return true;
282 }
283 
284 /* Function vect_stmt_relevant_p.
285 
286    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287    is "relevant for vectorization".
288 
289    A stmt is considered "relevant for vectorization" if:
290    - it has uses outside the loop.
291    - it has vdefs (it alters memory).
292    - control stmts in the loop (except for the exit condition).
293 
294    CHECKME: what other side effects would the vectorizer allow?  */
295 
296 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 		      enum vect_relevant *relevant, bool *live_p)
299 {
300   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301   ssa_op_iter op_iter;
302   imm_use_iterator imm_iter;
303   use_operand_p use_p;
304   def_operand_p def_p;
305 
306   *relevant = vect_unused_in_scope;
307   *live_p = false;
308 
309   /* cond stmt other than loop exit cond.  */
310   if (is_ctrl_stmt (stmt_info->stmt)
311       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312     *relevant = vect_used_in_scope;
313 
314   /* changing memory.  */
315   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316     if (gimple_vdef (stmt_info->stmt)
317 	&& !gimple_clobber_p (stmt_info->stmt))
318       {
319 	if (dump_enabled_p ())
320 	  dump_printf_loc (MSG_NOTE, vect_location,
321                            "vec_stmt_relevant_p: stmt has vdefs.\n");
322 	*relevant = vect_used_in_scope;
323       }
324 
325   /* uses outside the loop.  */
326   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327     {
328       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 	{
330 	  basic_block bb = gimple_bb (USE_STMT (use_p));
331 	  if (!flow_bb_inside_loop_p (loop, bb))
332 	    {
333 	      if (is_gimple_debug (USE_STMT (use_p)))
334 		continue;
335 
336 	      if (dump_enabled_p ())
337 		dump_printf_loc (MSG_NOTE, vect_location,
338                                  "vec_stmt_relevant_p: used out of loop.\n");
339 
340 	      /* We expect all such uses to be in the loop exit phis
341 		 (because of loop closed form)   */
342 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 	      gcc_assert (bb == single_exit (loop)->dest);
344 
345               *live_p = true;
346 	    }
347 	}
348     }
349 
350   if (*live_p && *relevant == vect_unused_in_scope
351       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352     {
353       if (dump_enabled_p ())
354 	dump_printf_loc (MSG_NOTE, vect_location,
355 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356       *relevant = vect_used_only_live;
357     }
358 
359   return (*live_p || *relevant);
360 }
361 
362 
363 /* Function exist_non_indexing_operands_for_use_p
364 
365    USE is one of the uses attached to STMT_INFO.  Check if USE is
366    used in STMT_INFO for anything other than indexing an array.  */
367 
368 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 {
371   tree operand;
372 
373   /* USE corresponds to some operand in STMT.  If there is no data
374      reference in STMT, then any operand that corresponds to USE
375      is not indexing an array.  */
376   if (!STMT_VINFO_DATA_REF (stmt_info))
377     return true;
378 
379   /* STMT has a data_ref. FORNOW this means that its of one of
380      the following forms:
381      -1- ARRAY_REF = var
382      -2- var = ARRAY_REF
383      (This should have been verified in analyze_data_refs).
384 
385      'var' in the second case corresponds to a def, not a use,
386      so USE cannot correspond to any operands that are not used
387      for array indexing.
388 
389      Therefore, all we need to check is if STMT falls into the
390      first case, and whether var corresponds to USE.  */
391 
392   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393   if (!assign || !gimple_assign_copy_p (assign))
394     {
395       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396       if (call && gimple_call_internal_p (call))
397 	{
398 	  internal_fn ifn = gimple_call_internal_fn (call);
399 	  int mask_index = internal_fn_mask_index (ifn);
400 	  if (mask_index >= 0
401 	      && use == gimple_call_arg (call, mask_index))
402 	    return true;
403 	  int stored_value_index = internal_fn_stored_value_index (ifn);
404 	  if (stored_value_index >= 0
405 	      && use == gimple_call_arg (call, stored_value_index))
406 	    return true;
407 	  if (internal_gather_scatter_fn_p (ifn)
408 	      && use == gimple_call_arg (call, 1))
409 	    return true;
410 	}
411       return false;
412     }
413 
414   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415     return false;
416   operand = gimple_assign_rhs1 (assign);
417   if (TREE_CODE (operand) != SSA_NAME)
418     return false;
419 
420   if (operand == use)
421     return true;
422 
423   return false;
424 }
425 
426 
427 /*
428    Function process_use.
429 
430    Inputs:
431    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433      that defined USE.  This is done by calling mark_relevant and passing it
434      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436      be performed.
437 
438    Outputs:
439    Generally, LIVE_P and RELEVANT are used to define the liveness and
440    relevance info of the DEF_STMT of this USE:
441        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443    Exceptions:
444    - case 1: If USE is used only for address computations (e.g. array indexing),
445    which does not need to be directly vectorized, then the liveness/relevance
446    of the respective DEF_STMT is left unchanged.
447    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448    we skip DEF_STMT cause it had already been processed.
449    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450    "relevant" will be modified accordingly.
451 
452    Return true if everything is as expected. Return false otherwise.  */
453 
454 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 	     bool force)
458 {
459   stmt_vec_info dstmt_vinfo;
460   enum vect_def_type dt;
461 
462   /* case 1: we are only interested in uses that need to be vectorized.  Uses
463      that are used for address computation are not considered relevant.  */
464   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465     return opt_result::success ();
466 
467   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468     return opt_result::failure_at (stmt_vinfo->stmt,
469 				   "not vectorized:"
470 				   " unsupported use in stmt.\n");
471 
472   if (!dstmt_vinfo)
473     return opt_result::success ();
474 
475   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
476   basic_block bb = gimple_bb (stmt_vinfo->stmt);
477 
478   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479      We have to force the stmt live since the epilogue loop needs it to
480      continue computing the reduction.  */
481   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485       && bb->loop_father == def_bb->loop_father)
486     {
487       if (dump_enabled_p ())
488 	dump_printf_loc (MSG_NOTE, vect_location,
489 			 "reduc-stmt defining reduc-phi in the same nest.\n");
490       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
491       return opt_result::success ();
492     }
493 
494   /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 	outer-loop-header-bb:
496 		d = dstmt_vinfo
497 	inner-loop:
498 		stmt # use (d)
499 	outer-loop-tail-bb:
500 		...		  */
501   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
502     {
503       if (dump_enabled_p ())
504 	dump_printf_loc (MSG_NOTE, vect_location,
505                          "outer-loop def-stmt defining inner-loop stmt.\n");
506 
507       switch (relevant)
508 	{
509 	case vect_unused_in_scope:
510 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
511 		      vect_used_in_scope : vect_unused_in_scope;
512 	  break;
513 
514 	case vect_used_in_outer_by_reduction:
515           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 	  relevant = vect_used_by_reduction;
517 	  break;
518 
519 	case vect_used_in_outer:
520           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 	  relevant = vect_used_in_scope;
522 	  break;
523 
524 	case vect_used_in_scope:
525 	  break;
526 
527 	default:
528 	  gcc_unreachable ();
529 	}
530     }
531 
532   /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 	outer-loop-header-bb:
534 		...
535 	inner-loop:
536 		d = dstmt_vinfo
537 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
538 		stmt # use (d)		*/
539   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
540     {
541       if (dump_enabled_p ())
542 	dump_printf_loc (MSG_NOTE, vect_location,
543                          "inner-loop def-stmt defining outer-loop stmt.\n");
544 
545       switch (relevant)
546         {
547         case vect_unused_in_scope:
548           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
549             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
550                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
551           break;
552 
553         case vect_used_by_reduction:
554 	case vect_used_only_live:
555           relevant = vect_used_in_outer_by_reduction;
556           break;
557 
558         case vect_used_in_scope:
559           relevant = vect_used_in_outer;
560           break;
561 
562         default:
563           gcc_unreachable ();
564         }
565     }
566   /* We are also not interested in uses on loop PHI backedges that are
567      inductions.  Otherwise we'll needlessly vectorize the IV increment
568      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
569      of course.  */
570   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
571 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
572 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
573 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
574 				      loop_latch_edge (bb->loop_father))
575 	       == use))
576     {
577       if (dump_enabled_p ())
578 	dump_printf_loc (MSG_NOTE, vect_location,
579                          "induction value on backedge.\n");
580       return opt_result::success ();
581     }
582 
583 
584   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
585   return opt_result::success ();
586 }
587 
588 
589 /* Function vect_mark_stmts_to_be_vectorized.
590 
591    Not all stmts in the loop need to be vectorized. For example:
592 
593      for i...
594        for j...
595    1.    T0 = i + j
596    2.	 T1 = a[T0]
597 
598    3.    j = j + 1
599 
600    Stmt 1 and 3 do not need to be vectorized, because loop control and
601    addressing of vectorized data-refs are handled differently.
602 
603    This pass detects such stmts.  */
604 
605 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
607 {
608   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610   unsigned int nbbs = loop->num_nodes;
611   gimple_stmt_iterator si;
612   unsigned int i;
613   basic_block bb;
614   bool live_p;
615   enum vect_relevant relevant;
616 
617   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
618 
619   auto_vec<stmt_vec_info, 64> worklist;
620 
621   /* 1. Init worklist.  */
622   for (i = 0; i < nbbs; i++)
623     {
624       bb = bbs[i];
625       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
626 	{
627 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
628 	  if (dump_enabled_p ())
629 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
630 			     phi_info->stmt);
631 
632 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
633 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
634 	}
635       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 	{
637 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
638 	  if (dump_enabled_p ())
639 	      dump_printf_loc (MSG_NOTE, vect_location,
640 			       "init: stmt relevant? %G", stmt_info->stmt);
641 
642 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
643 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
644 	}
645     }
646 
647   /* 2. Process_worklist */
648   while (worklist.length () > 0)
649     {
650       use_operand_p use_p;
651       ssa_op_iter iter;
652 
653       stmt_vec_info stmt_vinfo = worklist.pop ();
654       if (dump_enabled_p ())
655 	dump_printf_loc (MSG_NOTE, vect_location,
656 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
657 
658       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
660 	 of STMT.  */
661       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
662 
663       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 	 propagated as is to the DEF_STMTs of its USEs.
665 
666 	 One exception is when STMT has been identified as defining a reduction
667 	 variable; in this case we set the relevance to vect_used_by_reduction.
668 	 This is because we distinguish between two kinds of relevant stmts -
669 	 those that are used by a reduction computation, and those that are
670 	 (also) used by a regular computation.  This allows us later on to
671 	 identify stmts that are used solely by a reduction, and therefore the
672 	 order of the results that they produce does not have to be kept.  */
673 
674       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
675         {
676           case vect_reduction_def:
677 	    gcc_assert (relevant != vect_unused_in_scope);
678 	    if (relevant != vect_unused_in_scope
679 		&& relevant != vect_used_in_scope
680 		&& relevant != vect_used_by_reduction
681 		&& relevant != vect_used_only_live)
682 	      return opt_result::failure_at
683 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
684 	    break;
685 
686           case vect_nested_cycle:
687 	    if (relevant != vect_unused_in_scope
688 		&& relevant != vect_used_in_outer_by_reduction
689 		&& relevant != vect_used_in_outer)
690 	      return opt_result::failure_at
691 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
692             break;
693 
694           case vect_double_reduction_def:
695 	    if (relevant != vect_unused_in_scope
696 		&& relevant != vect_used_by_reduction
697 		&& relevant != vect_used_only_live)
698 	      return opt_result::failure_at
699 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
700             break;
701 
702           default:
703             break;
704         }
705 
706       if (is_pattern_stmt_p (stmt_vinfo))
707         {
708           /* Pattern statements are not inserted into the code, so
709              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710              have to scan the RHS or function arguments instead.  */
711 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
712 	    {
713 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
714 	      tree op = gimple_assign_rhs1 (assign);
715 
716 	      i = 1;
717 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
718 		{
719 		  opt_result res
720 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
721 				   loop_vinfo, relevant, &worklist, false);
722 		  if (!res)
723 		    return res;
724 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
725 				     loop_vinfo, relevant, &worklist, false);
726 		  if (!res)
727 		    return res;
728 		  i = 2;
729 		}
730 	      for (; i < gimple_num_ops (assign); i++)
731 		{
732 		  op = gimple_op (assign, i);
733                   if (TREE_CODE (op) == SSA_NAME)
734 		    {
735 		      opt_result res
736 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
737 				       &worklist, false);
738 		      if (!res)
739 			return res;
740 		    }
741                  }
742             }
743 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
744 	    {
745 	      for (i = 0; i < gimple_call_num_args (call); i++)
746 		{
747 		  tree arg = gimple_call_arg (call, i);
748 		  opt_result res
749 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
750 				   &worklist, false);
751 		  if (!res)
752 		    return res;
753 		}
754 	    }
755         }
756       else
757 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
758           {
759             tree op = USE_FROM_PTR (use_p);
760 	    opt_result res
761 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
762 			     &worklist, false);
763 	    if (!res)
764 	      return res;
765           }
766 
767       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
768 	{
769 	  gather_scatter_info gs_info;
770 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
771 	    gcc_unreachable ();
772 	  opt_result res
773 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
774 			   &worklist, true);
775 	  if (!res)
776 	    {
777 	      if (fatal)
778 		*fatal = false;
779 	      return res;
780 	    }
781 	}
782     } /* while worklist */
783 
784   return opt_result::success ();
785 }
786 
787 /* Compute the prologue cost for invariant or constant operands.  */
788 
789 static unsigned
vect_prologue_cost_for_slp_op(slp_tree node,stmt_vec_info stmt_info,unsigned opno,enum vect_def_type dt,stmt_vector_for_cost * cost_vec)790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 			       unsigned opno, enum vect_def_type dt,
792 			       stmt_vector_for_cost *cost_vec)
793 {
794   vec_info *vinfo = stmt_info->vinfo;
795   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796   tree op = gimple_op (stmt, opno);
797   unsigned prologue_cost = 0;
798 
799   /* Without looking at the actual initializer a vector of
800      constants can be implemented as load from the constant pool.
801      When all elements are the same we can use a splat.  */
802   tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
803   unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804   unsigned num_vects_to_check;
805   unsigned HOST_WIDE_INT const_nunits;
806   unsigned nelt_limit;
807   if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808       && ! multiple_p (const_nunits, group_size))
809     {
810       num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811       nelt_limit = const_nunits;
812     }
813   else
814     {
815       /* If either the vector has variable length or the vectors
816 	 are composed of repeated whole groups we only need to
817 	 cost construction once.  All vectors will be the same.  */
818       num_vects_to_check = 1;
819       nelt_limit = group_size;
820     }
821   tree elt = NULL_TREE;
822   unsigned nelt = 0;
823   for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824     {
825       unsigned si = j % group_size;
826       if (nelt == 0)
827 	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828       /* ???  We're just tracking whether all operands of a single
829 	 vector initializer are the same, ideally we'd check if
830 	 we emitted the same one already.  */
831       else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 				 opno))
833 	elt = NULL_TREE;
834       nelt++;
835       if (nelt == nelt_limit)
836 	{
837 	  /* ???  We need to pass down stmt_info for a vector type
838 	     even if it points to the wrong stmt.  */
839 	  prologue_cost += record_stmt_cost
840 	      (cost_vec, 1,
841 	       dt == vect_external_def
842 	       ? (elt ? scalar_to_vec : vec_construct)
843 	       : vector_load,
844 	       stmt_info, 0, vect_prologue);
845 	  nelt = 0;
846 	}
847     }
848 
849   return prologue_cost;
850 }
851 
852 /* Function vect_model_simple_cost.
853 
854    Models cost for simple operations, i.e. those that only emit ncopies of a
855    single op.  Right now, this does not account for multiple insns that could
856    be generated for the single vector op.  We will handle that shortly.  */
857 
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 			enum vect_def_type *dt,
861 			int ndts,
862 			slp_tree node,
863 			stmt_vector_for_cost *cost_vec,
864 			vect_cost_for_stmt kind = vector_stmt)
865 {
866   int inside_cost = 0, prologue_cost = 0;
867 
868   gcc_assert (cost_vec != NULL);
869 
870   /* ???  Somehow we need to fix this at the callers.  */
871   if (node)
872     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
873 
874   if (node)
875     {
876       /* Scan operands and account for prologue cost of constants/externals.
877 	 ???  This over-estimates cost for multiple uses and should be
878 	 re-engineered.  */
879       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
880       tree lhs = gimple_get_lhs (stmt);
881       for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
882 	{
883 	  tree op = gimple_op (stmt, i);
884 	  enum vect_def_type dt;
885 	  if (!op || op == lhs)
886 	    continue;
887 	  if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
888 	      && (dt == vect_constant_def || dt == vect_external_def))
889 	    prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
890 							    i, dt, cost_vec);
891 	}
892     }
893   else
894     /* Cost the "broadcast" of a scalar operand in to a vector operand.
895        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
896        cost model.  */
897     for (int i = 0; i < ndts; i++)
898       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
899 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
900 					   stmt_info, 0, vect_prologue);
901 
902   /* Adjust for two-operator SLP nodes.  */
903   if (node && SLP_TREE_TWO_OPERATORS (node))
904     {
905       ncopies *= 2;
906       inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
907 				       stmt_info, 0, vect_body);
908     }
909 
910   /* Pass the inside-of-loop statements to the target-specific cost model.  */
911   inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
912 				   stmt_info, 0, vect_body);
913 
914   if (dump_enabled_p ())
915     dump_printf_loc (MSG_NOTE, vect_location,
916                      "vect_model_simple_cost: inside_cost = %d, "
917                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
918 }
919 
920 
921 /* Model cost for type demotion and promotion operations.  PWR is
922    normally zero for single-step promotions and demotions.  It will be
923    one if two-step promotion/demotion is required, and so on.  NCOPIES
924    is the number of vector results (and thus number of instructions)
925    for the narrowest end of the operation chain.  Each additional
926    step doubles the number of instructions required.  */
927 
928 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec)929 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
930 				    enum vect_def_type *dt,
931 				    unsigned int ncopies, int pwr,
932 				    stmt_vector_for_cost *cost_vec)
933 {
934   int i;
935   int inside_cost = 0, prologue_cost = 0;
936 
937   for (i = 0; i < pwr + 1; i++)
938     {
939       inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
940 				       stmt_info, 0, vect_body);
941       ncopies *= 2;
942     }
943 
944   /* FORNOW: Assuming maximum 2 args per stmts.  */
945   for (i = 0; i < 2; i++)
946     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
947       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
948 					 stmt_info, 0, vect_prologue);
949 
950   if (dump_enabled_p ())
951     dump_printf_loc (MSG_NOTE, vect_location,
952                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
953                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
954 }
955 
956 /* Returns true if the current function returns DECL.  */
957 
958 static bool
cfun_returns(tree decl)959 cfun_returns (tree decl)
960 {
961   edge_iterator ei;
962   edge e;
963   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
964     {
965       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
966       if (!ret)
967 	continue;
968       if (gimple_return_retval (ret) == decl)
969 	return true;
970       /* We often end up with an aggregate copy to the result decl,
971          handle that case as well.  First skip intermediate clobbers
972 	 though.  */
973       gimple *def = ret;
974       do
975 	{
976 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
977 	}
978       while (gimple_clobber_p (def));
979       if (is_a <gassign *> (def)
980 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
981 	  && gimple_assign_rhs1 (def) == decl)
982 	return true;
983     }
984   return false;
985 }
986 
987 /* Function vect_model_store_cost
988 
989    Models cost for stores.  In the case of grouped accesses, one access
990    has the overhead of the grouped access attributed to it.  */
991 
992 static void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type dt,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)993 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
994 		       enum vect_def_type dt,
995 		       vect_memory_access_type memory_access_type,
996 		       vec_load_store_type vls_type, slp_tree slp_node,
997 		       stmt_vector_for_cost *cost_vec)
998 {
999   unsigned int inside_cost = 0, prologue_cost = 0;
1000   stmt_vec_info first_stmt_info = stmt_info;
1001   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1002 
1003   /* ???  Somehow we need to fix this at the callers.  */
1004   if (slp_node)
1005     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1006 
1007   if (vls_type == VLS_STORE_INVARIANT)
1008     {
1009       if (slp_node)
1010 	prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1011 							1, dt, cost_vec);
1012       else
1013 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1014 					   stmt_info, 0, vect_prologue);
1015     }
1016 
1017   /* Grouped stores update all elements in the group at once,
1018      so we want the DR for the first statement.  */
1019   if (!slp_node && grouped_access_p)
1020     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1021 
1022   /* True if we should include any once-per-group costs as well as
1023      the cost of the statement itself.  For SLP we only get called
1024      once per group anyhow.  */
1025   bool first_stmt_p = (first_stmt_info == stmt_info);
1026 
1027   /* We assume that the cost of a single store-lanes instruction is
1028      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
1029      access is instead being provided by a permute-and-store operation,
1030      include the cost of the permutes.  */
1031   if (first_stmt_p
1032       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1033     {
1034       /* Uses a high and low interleave or shuffle operations for each
1035 	 needed permute.  */
1036       int group_size = DR_GROUP_SIZE (first_stmt_info);
1037       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1038       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1039 				      stmt_info, 0, vect_body);
1040 
1041       if (dump_enabled_p ())
1042         dump_printf_loc (MSG_NOTE, vect_location,
1043                          "vect_model_store_cost: strided group_size = %d .\n",
1044                          group_size);
1045     }
1046 
1047   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1048   /* Costs of the stores.  */
1049   if (memory_access_type == VMAT_ELEMENTWISE
1050       || memory_access_type == VMAT_GATHER_SCATTER)
1051     {
1052       /* N scalar stores plus extracting the elements.  */
1053       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1054       inside_cost += record_stmt_cost (cost_vec,
1055 				       ncopies * assumed_nunits,
1056 				       scalar_store, stmt_info, 0, vect_body);
1057     }
1058   else
1059     vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1060 
1061   if (memory_access_type == VMAT_ELEMENTWISE
1062       || memory_access_type == VMAT_STRIDED_SLP)
1063     {
1064       /* N scalar stores plus extracting the elements.  */
1065       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1066       inside_cost += record_stmt_cost (cost_vec,
1067 				       ncopies * assumed_nunits,
1068 				       vec_to_scalar, stmt_info, 0, vect_body);
1069     }
1070 
1071   /* When vectorizing a store into the function result assign
1072      a penalty if the function returns in a multi-register location.
1073      In this case we assume we'll end up with having to spill the
1074      vector result and do piecewise loads as a conservative estimate.  */
1075   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1076   if (base
1077       && (TREE_CODE (base) == RESULT_DECL
1078 	  || (DECL_P (base) && cfun_returns (base)))
1079       && !aggregate_value_p (base, cfun->decl))
1080     {
1081       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1082       /* ???  Handle PARALLEL in some way.  */
1083       if (REG_P (reg))
1084 	{
1085 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1086 	  /* Assume that a single reg-reg move is possible and cheap,
1087 	     do not account for vector to gp register move cost.  */
1088 	  if (nregs > 1)
1089 	    {
1090 	      /* Spill.  */
1091 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
1092 						 vector_store,
1093 						 stmt_info, 0, vect_epilogue);
1094 	      /* Loads.  */
1095 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1096 						 scalar_load,
1097 						 stmt_info, 0, vect_epilogue);
1098 	    }
1099 	}
1100     }
1101 
1102   if (dump_enabled_p ())
1103     dump_printf_loc (MSG_NOTE, vect_location,
1104                      "vect_model_store_cost: inside_cost = %d, "
1105                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1106 }
1107 
1108 
1109 /* Calculate cost of DR's memory access.  */
1110 void
vect_get_store_cost(stmt_vec_info stmt_info,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1111 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1112 		     unsigned int *inside_cost,
1113 		     stmt_vector_for_cost *body_cost_vec)
1114 {
1115   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1116   int alignment_support_scheme
1117     = vect_supportable_dr_alignment (dr_info, false);
1118 
1119   switch (alignment_support_scheme)
1120     {
1121     case dr_aligned:
1122       {
1123 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1124 					  vector_store, stmt_info, 0,
1125 					  vect_body);
1126 
1127         if (dump_enabled_p ())
1128           dump_printf_loc (MSG_NOTE, vect_location,
1129                            "vect_model_store_cost: aligned.\n");
1130         break;
1131       }
1132 
1133     case dr_unaligned_supported:
1134       {
1135         /* Here, we assign an additional cost for the unaligned store.  */
1136 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1137 					  unaligned_store, stmt_info,
1138 					  DR_MISALIGNMENT (dr_info),
1139 					  vect_body);
1140         if (dump_enabled_p ())
1141           dump_printf_loc (MSG_NOTE, vect_location,
1142                            "vect_model_store_cost: unaligned supported by "
1143                            "hardware.\n");
1144         break;
1145       }
1146 
1147     case dr_unaligned_unsupported:
1148       {
1149         *inside_cost = VECT_MAX_COST;
1150 
1151         if (dump_enabled_p ())
1152           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1153                            "vect_model_store_cost: unsupported access.\n");
1154         break;
1155       }
1156 
1157     default:
1158       gcc_unreachable ();
1159     }
1160 }
1161 
1162 
1163 /* Function vect_model_load_cost
1164 
1165    Models cost for loads.  In the case of grouped accesses, one access has
1166    the overhead of the grouped access attributed to it.  Since unaligned
1167    accesses are supported for loads, we also account for the costs of the
1168    access scheme chosen.  */
1169 
1170 static void
vect_model_load_cost(stmt_vec_info stmt_info,unsigned ncopies,vect_memory_access_type memory_access_type,slp_instance instance,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1171 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1172 		      vect_memory_access_type memory_access_type,
1173 		      slp_instance instance,
1174 		      slp_tree slp_node,
1175 		      stmt_vector_for_cost *cost_vec)
1176 {
1177   unsigned int inside_cost = 0, prologue_cost = 0;
1178   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1179 
1180   gcc_assert (cost_vec);
1181 
1182   /* ???  Somehow we need to fix this at the callers.  */
1183   if (slp_node)
1184     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1185 
1186   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1187     {
1188       /* If the load is permuted then the alignment is determined by
1189 	 the first group element not by the first scalar stmt DR.  */
1190       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1191       /* Record the cost for the permutation.  */
1192       unsigned n_perms;
1193       unsigned assumed_nunits
1194 	= vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1195       unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1196       vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1197 				    slp_vf, instance, true,
1198 				    &n_perms);
1199       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1200 				       first_stmt_info, 0, vect_body);
1201       /* And adjust the number of loads performed.  This handles
1202 	 redundancies as well as loads that are later dead.  */
1203       auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1204       bitmap_clear (perm);
1205       for (unsigned i = 0;
1206 	   i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1207 	bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1208       ncopies = 0;
1209       bool load_seen = false;
1210       for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1211 	{
1212 	  if (i % assumed_nunits == 0)
1213 	    {
1214 	      if (load_seen)
1215 		ncopies++;
1216 	      load_seen = false;
1217 	    }
1218 	  if (bitmap_bit_p (perm, i))
1219 	    load_seen = true;
1220 	}
1221       if (load_seen)
1222 	ncopies++;
1223       gcc_assert (ncopies
1224 		  <= (DR_GROUP_SIZE (first_stmt_info)
1225 		      - DR_GROUP_GAP (first_stmt_info)
1226 		      + assumed_nunits - 1) / assumed_nunits);
1227     }
1228 
1229   /* Grouped loads read all elements in the group at once,
1230      so we want the DR for the first statement.  */
1231   stmt_vec_info first_stmt_info = stmt_info;
1232   if (!slp_node && grouped_access_p)
1233     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1234 
1235   /* True if we should include any once-per-group costs as well as
1236      the cost of the statement itself.  For SLP we only get called
1237      once per group anyhow.  */
1238   bool first_stmt_p = (first_stmt_info == stmt_info);
1239 
1240   /* We assume that the cost of a single load-lanes instruction is
1241      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1242      access is instead being provided by a load-and-permute operation,
1243      include the cost of the permutes.  */
1244   if (first_stmt_p
1245       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1246     {
1247       /* Uses an even and odd extract operations or shuffle operations
1248 	 for each needed permute.  */
1249       int group_size = DR_GROUP_SIZE (first_stmt_info);
1250       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1251       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1252 				       stmt_info, 0, vect_body);
1253 
1254       if (dump_enabled_p ())
1255         dump_printf_loc (MSG_NOTE, vect_location,
1256                          "vect_model_load_cost: strided group_size = %d .\n",
1257                          group_size);
1258     }
1259 
1260   /* The loads themselves.  */
1261   if (memory_access_type == VMAT_ELEMENTWISE
1262       || memory_access_type == VMAT_GATHER_SCATTER)
1263     {
1264       /* N scalar loads plus gathering them into a vector.  */
1265       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1266       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1267       inside_cost += record_stmt_cost (cost_vec,
1268 				       ncopies * assumed_nunits,
1269 				       scalar_load, stmt_info, 0, vect_body);
1270     }
1271   else
1272     vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1273 			&inside_cost, &prologue_cost,
1274 			cost_vec, cost_vec, true);
1275   if (memory_access_type == VMAT_ELEMENTWISE
1276       || memory_access_type == VMAT_STRIDED_SLP)
1277     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1278 				     stmt_info, 0, vect_body);
1279 
1280   if (dump_enabled_p ())
1281     dump_printf_loc (MSG_NOTE, vect_location,
1282                      "vect_model_load_cost: inside_cost = %d, "
1283                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1284 }
1285 
1286 
1287 /* Calculate cost of DR's memory access.  */
1288 void
vect_get_load_cost(stmt_vec_info stmt_info,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1289 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1290 		    bool add_realign_cost, unsigned int *inside_cost,
1291 		    unsigned int *prologue_cost,
1292 		    stmt_vector_for_cost *prologue_cost_vec,
1293 		    stmt_vector_for_cost *body_cost_vec,
1294 		    bool record_prologue_costs)
1295 {
1296   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1297   int alignment_support_scheme
1298     = vect_supportable_dr_alignment (dr_info, false);
1299 
1300   switch (alignment_support_scheme)
1301     {
1302     case dr_aligned:
1303       {
1304 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1305 					  stmt_info, 0, vect_body);
1306 
1307         if (dump_enabled_p ())
1308           dump_printf_loc (MSG_NOTE, vect_location,
1309                            "vect_model_load_cost: aligned.\n");
1310 
1311         break;
1312       }
1313     case dr_unaligned_supported:
1314       {
1315         /* Here, we assign an additional cost for the unaligned load.  */
1316 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1317 					  unaligned_load, stmt_info,
1318 					  DR_MISALIGNMENT (dr_info),
1319 					  vect_body);
1320 
1321         if (dump_enabled_p ())
1322           dump_printf_loc (MSG_NOTE, vect_location,
1323                            "vect_model_load_cost: unaligned supported by "
1324                            "hardware.\n");
1325 
1326         break;
1327       }
1328     case dr_explicit_realign:
1329       {
1330 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1331 					  vector_load, stmt_info, 0, vect_body);
1332 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1333 					  vec_perm, stmt_info, 0, vect_body);
1334 
1335         /* FIXME: If the misalignment remains fixed across the iterations of
1336            the containing loop, the following cost should be added to the
1337            prologue costs.  */
1338         if (targetm.vectorize.builtin_mask_for_load)
1339 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1340 					    stmt_info, 0, vect_body);
1341 
1342         if (dump_enabled_p ())
1343           dump_printf_loc (MSG_NOTE, vect_location,
1344                            "vect_model_load_cost: explicit realign\n");
1345 
1346         break;
1347       }
1348     case dr_explicit_realign_optimized:
1349       {
1350         if (dump_enabled_p ())
1351           dump_printf_loc (MSG_NOTE, vect_location,
1352                            "vect_model_load_cost: unaligned software "
1353                            "pipelined.\n");
1354 
1355         /* Unaligned software pipeline has a load of an address, an initial
1356            load, and possibly a mask operation to "prime" the loop.  However,
1357            if this is an access in a group of loads, which provide grouped
1358            access, then the above cost should only be considered for one
1359            access in the group.  Inside the loop, there is a load op
1360            and a realignment op.  */
1361 
1362         if (add_realign_cost && record_prologue_costs)
1363           {
1364 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1365 						vector_stmt, stmt_info,
1366 						0, vect_prologue);
1367             if (targetm.vectorize.builtin_mask_for_load)
1368 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1369 						  vector_stmt, stmt_info,
1370 						  0, vect_prologue);
1371           }
1372 
1373 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1374 					  stmt_info, 0, vect_body);
1375 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1376 					  stmt_info, 0, vect_body);
1377 
1378         if (dump_enabled_p ())
1379           dump_printf_loc (MSG_NOTE, vect_location,
1380                            "vect_model_load_cost: explicit realign optimized"
1381                            "\n");
1382 
1383         break;
1384       }
1385 
1386     case dr_unaligned_unsupported:
1387       {
1388         *inside_cost = VECT_MAX_COST;
1389 
1390         if (dump_enabled_p ())
1391           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392                            "vect_model_load_cost: unsupported access.\n");
1393         break;
1394       }
1395 
1396     default:
1397       gcc_unreachable ();
1398     }
1399 }
1400 
1401 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1402    the loop preheader for the vectorized stmt STMT_VINFO.  */
1403 
1404 static void
vect_init_vector_1(stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1405 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1406 		    gimple_stmt_iterator *gsi)
1407 {
1408   if (gsi)
1409     vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1410   else
1411     {
1412       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1413 
1414       if (loop_vinfo)
1415         {
1416 	  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1417 	  basic_block new_bb;
1418 	  edge pe;
1419 
1420 	  if (nested_in_vect_loop_p (loop, stmt_vinfo))
1421 	    loop = loop->inner;
1422 
1423 	  pe = loop_preheader_edge (loop);
1424           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1425           gcc_assert (!new_bb);
1426 	}
1427       else
1428        {
1429           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1430           basic_block bb;
1431           gimple_stmt_iterator gsi_bb_start;
1432 
1433           gcc_assert (bb_vinfo);
1434           bb = BB_VINFO_BB (bb_vinfo);
1435           gsi_bb_start = gsi_after_labels (bb);
1436           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1437        }
1438     }
1439 
1440   if (dump_enabled_p ())
1441     dump_printf_loc (MSG_NOTE, vect_location,
1442 		     "created new init_stmt: %G", new_stmt);
1443 }
1444 
1445 /* Function vect_init_vector.
1446 
1447    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1448    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1449    vector type a vector with all elements equal to VAL is created first.
1450    Place the initialization at GSI if it is not NULL.  Otherwise, place the
1451    initialization at the loop preheader.
1452    Return the DEF of INIT_STMT.
1453    It will be used in the vectorization of STMT_INFO.  */
1454 
1455 tree
vect_init_vector(stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1456 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1457 		  gimple_stmt_iterator *gsi)
1458 {
1459   gimple *init_stmt;
1460   tree new_temp;
1461 
1462   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1463   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1464     {
1465       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1466       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1467 	{
1468 	  /* Scalar boolean value should be transformed into
1469 	     all zeros or all ones value before building a vector.  */
1470 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1471 	    {
1472 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1473 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1474 
1475 	      if (CONSTANT_CLASS_P (val))
1476 		val = integer_zerop (val) ? false_val : true_val;
1477 	      else
1478 		{
1479 		  new_temp = make_ssa_name (TREE_TYPE (type));
1480 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1481 						   val, true_val, false_val);
1482 		  vect_init_vector_1 (stmt_info, init_stmt, gsi);
1483 		  val = new_temp;
1484 		}
1485 	    }
1486 	  else
1487 	    {
1488 	      gimple_seq stmts = NULL;
1489 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1490 		val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1491 				    TREE_TYPE (type), val);
1492 	      else
1493 		/* ???  Condition vectorization expects us to do
1494 		   promotion of invariant/external defs.  */
1495 		val = gimple_convert (&stmts, TREE_TYPE (type), val);
1496 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1497 		   !gsi_end_p (gsi2); )
1498 		{
1499 		  init_stmt = gsi_stmt (gsi2);
1500 		  gsi_remove (&gsi2, false);
1501 		  vect_init_vector_1 (stmt_info, init_stmt, gsi);
1502 		}
1503 	    }
1504 	}
1505       val = build_vector_from_val (type, val);
1506     }
1507 
1508   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1509   init_stmt = gimple_build_assign (new_temp, val);
1510   vect_init_vector_1 (stmt_info, init_stmt, gsi);
1511   return new_temp;
1512 }
1513 
1514 /* Function vect_get_vec_def_for_operand_1.
1515 
1516    For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1517    with type DT that will be used in the vectorized stmt.  */
1518 
1519 tree
vect_get_vec_def_for_operand_1(stmt_vec_info def_stmt_info,enum vect_def_type dt)1520 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1521 				enum vect_def_type dt)
1522 {
1523   tree vec_oprnd;
1524   stmt_vec_info vec_stmt_info;
1525 
1526   switch (dt)
1527     {
1528     /* operand is a constant or a loop invariant.  */
1529     case vect_constant_def:
1530     case vect_external_def:
1531       /* Code should use vect_get_vec_def_for_operand.  */
1532       gcc_unreachable ();
1533 
1534     /* Operand is defined by a loop header phi.  In case of nested
1535        cycles we also may have uses of the backedge def.  */
1536     case vect_reduction_def:
1537     case vect_double_reduction_def:
1538     case vect_nested_cycle:
1539     case vect_induction_def:
1540       gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1541 		  || dt == vect_nested_cycle);
1542       /* Fallthru.  */
1543 
1544     /* operand is defined inside the loop.  */
1545     case vect_internal_def:
1546       {
1547         /* Get the def from the vectorized stmt.  */
1548 	vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1549 	/* Get vectorized pattern statement.  */
1550 	if (!vec_stmt_info
1551 	    && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1552 	    && !STMT_VINFO_RELEVANT (def_stmt_info))
1553 	  vec_stmt_info = (STMT_VINFO_VEC_STMT
1554 			   (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1555 	gcc_assert (vec_stmt_info);
1556 	if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1557 	  vec_oprnd = PHI_RESULT (phi);
1558 	else
1559 	  vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1560 	return vec_oprnd;
1561       }
1562 
1563     default:
1564       gcc_unreachable ();
1565     }
1566 }
1567 
1568 
1569 /* Function vect_get_vec_def_for_operand.
1570 
1571    OP is an operand in STMT_VINFO.  This function returns a (vector) def
1572    that will be used in the vectorized stmt for STMT_VINFO.
1573 
1574    In the case that OP is an SSA_NAME which is defined in the loop, then
1575    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1576 
1577    In case OP is an invariant or constant, a new stmt that creates a vector def
1578    needs to be introduced.  VECTYPE may be used to specify a required type for
1579    vector invariant.  */
1580 
1581 tree
vect_get_vec_def_for_operand(tree op,stmt_vec_info stmt_vinfo,tree vectype)1582 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1583 {
1584   gimple *def_stmt;
1585   enum vect_def_type dt;
1586   bool is_simple_use;
1587   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1588 
1589   if (dump_enabled_p ())
1590     dump_printf_loc (MSG_NOTE, vect_location,
1591 		     "vect_get_vec_def_for_operand: %T\n", op);
1592 
1593   stmt_vec_info def_stmt_info;
1594   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1595 				      &def_stmt_info, &def_stmt);
1596   gcc_assert (is_simple_use);
1597   if (def_stmt && dump_enabled_p ())
1598     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1599 
1600   if (dt == vect_constant_def || dt == vect_external_def)
1601     {
1602       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1603       tree vector_type;
1604 
1605       if (vectype)
1606 	vector_type = vectype;
1607       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1608 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1609 	vector_type = truth_type_for (stmt_vectype);
1610       else
1611 	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1612 
1613       gcc_assert (vector_type);
1614       return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1615     }
1616   else
1617     return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1618 }
1619 
1620 
1621 /* Function vect_get_vec_def_for_stmt_copy
1622 
1623    Return a vector-def for an operand.  This function is used when the
1624    vectorized stmt to be created (by the caller to this function) is a "copy"
1625    created in case the vectorized result cannot fit in one vector, and several
1626    copies of the vector-stmt are required.  In this case the vector-def is
1627    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1628    of the stmt that defines VEC_OPRND.  VINFO describes the vectorization.
1629 
1630    Context:
1631         In case the vectorization factor (VF) is bigger than the number
1632    of elements that can fit in a vectype (nunits), we have to generate
1633    more than one vector stmt to vectorize the scalar stmt.  This situation
1634    arises when there are multiple data-types operated upon in the loop; the
1635    smallest data-type determines the VF, and as a result, when vectorizing
1636    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1637    vector stmt (each computing a vector of 'nunits' results, and together
1638    computing 'VF' results in each iteration).  This function is called when
1639    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1640    which VF=16 and nunits=4, so the number of copies required is 4):
1641 
1642    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1643 
1644    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1645                         VS1.1:  vx.1 = memref1      VS1.2
1646                         VS1.2:  vx.2 = memref2      VS1.3
1647                         VS1.3:  vx.3 = memref3
1648 
1649    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1650                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1651                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1652                         VSnew.3:  vz3 = vx.3 + ...
1653 
1654    The vectorization of S1 is explained in vectorizable_load.
1655    The vectorization of S2:
1656         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1657    the function 'vect_get_vec_def_for_operand' is called to
1658    get the relevant vector-def for each operand of S2.  For operand x it
1659    returns  the vector-def 'vx.0'.
1660 
1661         To create the remaining copies of the vector-stmt (VSnew.j), this
1662    function is called to get the relevant vector-def for each operand.  It is
1663    obtained from the respective VS1.j stmt, which is recorded in the
1664    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1665 
1666         For example, to obtain the vector-def 'vx.1' in order to create the
1667    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1668    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1669    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1670    and return its def ('vx.1').
1671    Overall, to create the above sequence this function will be called 3 times:
1672 	vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1673 	vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1674 	vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2);  */
1675 
1676 tree
vect_get_vec_def_for_stmt_copy(vec_info * vinfo,tree vec_oprnd)1677 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1678 {
1679   stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1680   if (!def_stmt_info)
1681     /* Do nothing; can reuse same def.  */
1682     return vec_oprnd;
1683 
1684   def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1685   gcc_assert (def_stmt_info);
1686   if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1687     vec_oprnd = PHI_RESULT (phi);
1688   else
1689     vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1690   return vec_oprnd;
1691 }
1692 
1693 
1694 /* Get vectorized definitions for the operands to create a copy of an original
1695    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1696 
1697 void
vect_get_vec_defs_for_stmt_copy(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1698 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1699 				 vec<tree> *vec_oprnds0,
1700 				 vec<tree> *vec_oprnds1)
1701 {
1702   tree vec_oprnd = vec_oprnds0->pop ();
1703 
1704   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705   vec_oprnds0->quick_push (vec_oprnd);
1706 
1707   if (vec_oprnds1 && vec_oprnds1->length ())
1708     {
1709       vec_oprnd = vec_oprnds1->pop ();
1710       vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1711       vec_oprnds1->quick_push (vec_oprnd);
1712     }
1713 }
1714 
1715 
1716 /* Get vectorized definitions for OP0 and OP1.  */
1717 
1718 void
vect_get_vec_defs(tree op0,tree op1,stmt_vec_info stmt_info,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1719 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1720 		   vec<tree> *vec_oprnds0,
1721 		   vec<tree> *vec_oprnds1,
1722 		   slp_tree slp_node)
1723 {
1724   if (slp_node)
1725     {
1726       auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1727       vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1728       *vec_oprnds0 = vec_defs[0];
1729       if (op1)
1730 	*vec_oprnds1 = vec_defs[1];
1731     }
1732   else
1733     {
1734       tree vec_oprnd;
1735 
1736       vec_oprnds0->create (1);
1737       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1738       vec_oprnds0->quick_push (vec_oprnd);
1739 
1740       if (op1)
1741 	{
1742 	  vec_oprnds1->create (1);
1743 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1744 	  vec_oprnds1->quick_push (vec_oprnd);
1745 	}
1746     }
1747 }
1748 
1749 /* Helper function called by vect_finish_replace_stmt and
1750    vect_finish_stmt_generation.  Set the location of the new
1751    statement and create and return a stmt_vec_info for it.  */
1752 
1753 static stmt_vec_info
vect_finish_stmt_generation_1(stmt_vec_info stmt_info,gimple * vec_stmt)1754 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1755 {
1756   vec_info *vinfo = stmt_info->vinfo;
1757 
1758   stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1759 
1760   if (dump_enabled_p ())
1761     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1762 
1763   gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1764 
1765   /* While EH edges will generally prevent vectorization, stmt might
1766      e.g. be in a must-not-throw region.  Ensure newly created stmts
1767      that could throw are part of the same region.  */
1768   int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1769   if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1770     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1771 
1772   return vec_stmt_info;
1773 }
1774 
1775 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1776    which sets the same scalar result as STMT_INFO did.  Create and return a
1777    stmt_vec_info for VEC_STMT.  */
1778 
1779 stmt_vec_info
vect_finish_replace_stmt(stmt_vec_info stmt_info,gimple * vec_stmt)1780 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1781 {
1782   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1783   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1784 
1785   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1786   gsi_replace (&gsi, vec_stmt, true);
1787 
1788   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1789 }
1790 
1791 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1792    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1793 
1794 stmt_vec_info
vect_finish_stmt_generation(stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1795 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1796 			     gimple_stmt_iterator *gsi)
1797 {
1798   gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1799 
1800   if (!gsi_end_p (*gsi)
1801       && gimple_has_mem_ops (vec_stmt))
1802     {
1803       gimple *at_stmt = gsi_stmt (*gsi);
1804       tree vuse = gimple_vuse (at_stmt);
1805       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1806 	{
1807 	  tree vdef = gimple_vdef (at_stmt);
1808 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1809 	  /* If we have an SSA vuse and insert a store, update virtual
1810 	     SSA form to avoid triggering the renamer.  Do so only
1811 	     if we can easily see all uses - which is what almost always
1812 	     happens with the way vectorized stmts are inserted.  */
1813 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1814 	      && ((is_gimple_assign (vec_stmt)
1815 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1816 		  || (is_gimple_call (vec_stmt)
1817 		      && !(gimple_call_flags (vec_stmt)
1818 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1819 	    {
1820 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1821 	      gimple_set_vdef (vec_stmt, new_vdef);
1822 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1823 	    }
1824 	}
1825     }
1826   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1827   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1828 }
1829 
1830 /* We want to vectorize a call to combined function CFN with function
1831    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1832    as the types of all inputs.  Check whether this is possible using
1833    an internal function, returning its code if so or IFN_LAST if not.  */
1834 
1835 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1836 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1837 				tree vectype_out, tree vectype_in)
1838 {
1839   internal_fn ifn;
1840   if (internal_fn_p (cfn))
1841     ifn = as_internal_fn (cfn);
1842   else
1843     ifn = associated_internal_fn (fndecl);
1844   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1845     {
1846       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1847       if (info.vectorizable)
1848 	{
1849 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1850 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1851 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1852 					      OPTIMIZE_FOR_SPEED))
1853 	    return ifn;
1854 	}
1855     }
1856   return IFN_LAST;
1857 }
1858 
1859 
1860 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1861 				  gimple_stmt_iterator *);
1862 
1863 /* Check whether a load or store statement in the loop described by
1864    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1865    whether the vectorizer pass has the appropriate support, as well as
1866    whether the target does.
1867 
1868    VLS_TYPE says whether the statement is a load or store and VECTYPE
1869    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1870    says how the load or store is going to be implemented and GROUP_SIZE
1871    is the number of load or store statements in the containing group.
1872    If the access is a gather load or scatter store, GS_INFO describes
1873    its arguments.  If the load or store is conditional, SCALAR_MASK is the
1874    condition under which it occurs.
1875 
1876    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877    supported, otherwise record the required mask types.  */
1878 
1879 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 			  vec_load_store_type vls_type, int group_size,
1882 			  vect_memory_access_type memory_access_type,
1883 			  gather_scatter_info *gs_info, tree scalar_mask)
1884 {
1885   /* Invariant loads need no special support.  */
1886   if (memory_access_type == VMAT_INVARIANT)
1887     return;
1888 
1889   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890   machine_mode vecmode = TYPE_MODE (vectype);
1891   bool is_load = (vls_type == VLS_LOAD);
1892   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1893     {
1894       if (is_load
1895 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1896 	  : !vect_store_lanes_supported (vectype, group_size, true))
1897 	{
1898 	  if (dump_enabled_p ())
1899 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 			     "can't use a fully-masked loop because the"
1901 			     " target doesn't have an appropriate masked"
1902 			     " load/store-lanes instruction.\n");
1903 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 	  return;
1905 	}
1906       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1908       return;
1909     }
1910 
1911   if (memory_access_type == VMAT_GATHER_SCATTER)
1912     {
1913       internal_fn ifn = (is_load
1914 			 ? IFN_MASK_GATHER_LOAD
1915 			 : IFN_MASK_SCATTER_STORE);
1916       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1917 						   gs_info->memory_type,
1918 						   gs_info->offset_vectype,
1919 						   gs_info->scale))
1920 	{
1921 	  if (dump_enabled_p ())
1922 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1923 			     "can't use a fully-masked loop because the"
1924 			     " target doesn't have an appropriate masked"
1925 			     " gather load or scatter store instruction.\n");
1926 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1927 	  return;
1928 	}
1929       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1930       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1931       return;
1932     }
1933 
1934   if (memory_access_type != VMAT_CONTIGUOUS
1935       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1936     {
1937       /* Element X of the data must come from iteration i * VF + X of the
1938 	 scalar loop.  We need more work to support other mappings.  */
1939       if (dump_enabled_p ())
1940 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941 			 "can't use a fully-masked loop because an access"
1942 			 " isn't contiguous.\n");
1943       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1944       return;
1945     }
1946 
1947   machine_mode mask_mode;
1948   if (!VECTOR_MODE_P (vecmode)
1949       || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1950       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1951     {
1952       if (dump_enabled_p ())
1953 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 			 "can't use a fully-masked loop because the target"
1955 			 " doesn't have the appropriate masked load or"
1956 			 " store.\n");
1957       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1958       return;
1959     }
1960   /* We might load more scalars than we need for permuting SLP loads.
1961      We checked in get_group_load_store_type that the extra elements
1962      don't leak into a new vector.  */
1963   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1965   unsigned int nvectors;
1966   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1967     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1968   else
1969     gcc_unreachable ();
1970 }
1971 
1972 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1973    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1974    that needs to be applied to all loads and stores in a vectorized loop.
1975    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1976 
1977    MASK_TYPE is the type of both masks.  If new statements are needed,
1978    insert them before GSI.  */
1979 
1980 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1981 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1982 			 gimple_stmt_iterator *gsi)
1983 {
1984   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1985   if (!loop_mask)
1986     return vec_mask;
1987 
1988   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1989   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1990   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1991 					  vec_mask, loop_mask);
1992   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1993   return and_res;
1994 }
1995 
1996 /* Determine whether we can use a gather load or scatter store to vectorize
1997    strided load or store STMT_INFO by truncating the current offset to a
1998    smaller width.  We need to be able to construct an offset vector:
1999 
2000      { 0, X, X*2, X*3, ... }
2001 
2002    without loss of precision, where X is STMT_INFO's DR_STEP.
2003 
2004    Return true if this is possible, describing the gather load or scatter
2005    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
2006 
2007 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2008 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2009 				     loop_vec_info loop_vinfo, bool masked_p,
2010 				     gather_scatter_info *gs_info)
2011 {
2012   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2013   data_reference *dr = dr_info->dr;
2014   tree step = DR_STEP (dr);
2015   if (TREE_CODE (step) != INTEGER_CST)
2016     {
2017       /* ??? Perhaps we could use range information here?  */
2018       if (dump_enabled_p ())
2019 	dump_printf_loc (MSG_NOTE, vect_location,
2020 			 "cannot truncate variable step.\n");
2021       return false;
2022     }
2023 
2024   /* Get the number of bits in an element.  */
2025   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2026   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2027   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2028 
2029   /* Set COUNT to the upper limit on the number of elements - 1.
2030      Start with the maximum vectorization factor.  */
2031   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2032 
2033   /* Try lowering COUNT to the number of scalar latch iterations.  */
2034   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2035   widest_int max_iters;
2036   if (max_loop_iterations (loop, &max_iters)
2037       && max_iters < count)
2038     count = max_iters.to_shwi ();
2039 
2040   /* Try scales of 1 and the element size.  */
2041   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2042   wi::overflow_type overflow = wi::OVF_NONE;
2043   for (int i = 0; i < 2; ++i)
2044     {
2045       int scale = scales[i];
2046       widest_int factor;
2047       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2048 	continue;
2049 
2050       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
2051       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2052       if (overflow)
2053 	continue;
2054       signop sign = range >= 0 ? UNSIGNED : SIGNED;
2055       unsigned int min_offset_bits = wi::min_precision (range, sign);
2056 
2057       /* Find the narrowest viable offset type.  */
2058       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2059       tree offset_type = build_nonstandard_integer_type (offset_bits,
2060 							 sign == UNSIGNED);
2061 
2062       /* See whether the target supports the operation with an offset
2063 	 no narrower than OFFSET_TYPE.  */
2064       tree memory_type = TREE_TYPE (DR_REF (dr));
2065       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2066 				     vectype, memory_type, offset_type, scale,
2067 				     &gs_info->ifn, &gs_info->offset_vectype))
2068 	continue;
2069 
2070       gs_info->decl = NULL_TREE;
2071       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2072 	 but we don't need to store that here.  */
2073       gs_info->base = NULL_TREE;
2074       gs_info->element_type = TREE_TYPE (vectype);
2075       gs_info->offset = fold_convert (offset_type, step);
2076       gs_info->offset_dt = vect_constant_def;
2077       gs_info->scale = scale;
2078       gs_info->memory_type = memory_type;
2079       return true;
2080     }
2081 
2082   if (overflow && dump_enabled_p ())
2083     dump_printf_loc (MSG_NOTE, vect_location,
2084 		     "truncating gather/scatter offset to %d bits"
2085 		     " might change its value.\n", element_bits);
2086 
2087   return false;
2088 }
2089 
2090 /* Return true if we can use gather/scatter internal functions to
2091    vectorize STMT_INFO, which is a grouped or strided load or store.
2092    MASKED_P is true if load or store is conditional.  When returning
2093    true, fill in GS_INFO with the information required to perform the
2094    operation.  */
2095 
2096 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2097 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2098 				    loop_vec_info loop_vinfo, bool masked_p,
2099 				    gather_scatter_info *gs_info)
2100 {
2101   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2102       || gs_info->decl)
2103     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2104 						masked_p, gs_info);
2105 
2106   tree old_offset_type = TREE_TYPE (gs_info->offset);
2107   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2108 
2109   gcc_assert (TYPE_PRECISION (new_offset_type)
2110 	      >= TYPE_PRECISION (old_offset_type));
2111   gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2112 
2113   if (dump_enabled_p ())
2114     dump_printf_loc (MSG_NOTE, vect_location,
2115 		     "using gather/scatter for strided/grouped access,"
2116 		     " scale = %d\n", gs_info->scale);
2117 
2118   return true;
2119 }
2120 
2121 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2122    elements with a known constant step.  Return -1 if that step
2123    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
2124 
2125 static int
compare_step_with_zero(stmt_vec_info stmt_info)2126 compare_step_with_zero (stmt_vec_info stmt_info)
2127 {
2128   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2129   return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2130 			       size_zero_node);
2131 }
2132 
2133 /* If the target supports a permute mask that reverses the elements in
2134    a vector of type VECTYPE, return that mask, otherwise return null.  */
2135 
2136 static tree
perm_mask_for_reverse(tree vectype)2137 perm_mask_for_reverse (tree vectype)
2138 {
2139   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2140 
2141   /* The encoding has a single stepped pattern.  */
2142   vec_perm_builder sel (nunits, 1, 3);
2143   for (int i = 0; i < 3; ++i)
2144     sel.quick_push (nunits - 1 - i);
2145 
2146   vec_perm_indices indices (sel, 1, nunits);
2147   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2148     return NULL_TREE;
2149   return vect_gen_perm_mask_checked (vectype, indices);
2150 }
2151 
2152 /* A subroutine of get_load_store_type, with a subset of the same
2153    arguments.  Handle the case where STMT_INFO is a load or store that
2154    accesses consecutive elements with a negative step.  */
2155 
2156 static vect_memory_access_type
get_negative_load_store_type(stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2157 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2158 			      vec_load_store_type vls_type,
2159 			      unsigned int ncopies)
2160 {
2161   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2162   dr_alignment_support alignment_support_scheme;
2163 
2164   if (ncopies > 1)
2165     {
2166       if (dump_enabled_p ())
2167 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 			 "multiple types with negative step.\n");
2169       return VMAT_ELEMENTWISE;
2170     }
2171 
2172   alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2173   if (alignment_support_scheme != dr_aligned
2174       && alignment_support_scheme != dr_unaligned_supported)
2175     {
2176       if (dump_enabled_p ())
2177 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 			 "negative step but alignment required.\n");
2179       return VMAT_ELEMENTWISE;
2180     }
2181 
2182   if (vls_type == VLS_STORE_INVARIANT)
2183     {
2184       if (dump_enabled_p ())
2185 	dump_printf_loc (MSG_NOTE, vect_location,
2186 			 "negative step with invariant source;"
2187 			 " no permute needed.\n");
2188       return VMAT_CONTIGUOUS_DOWN;
2189     }
2190 
2191   if (!perm_mask_for_reverse (vectype))
2192     {
2193       if (dump_enabled_p ())
2194 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195 			 "negative step and reversing not supported.\n");
2196       return VMAT_ELEMENTWISE;
2197     }
2198 
2199   return VMAT_CONTIGUOUS_REVERSE;
2200 }
2201 
2202 /* STMT_INFO is either a masked or unconditional store.  Return the value
2203    being stored.  */
2204 
2205 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2206 vect_get_store_rhs (stmt_vec_info stmt_info)
2207 {
2208   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2209     {
2210       gcc_assert (gimple_assign_single_p (assign));
2211       return gimple_assign_rhs1 (assign);
2212     }
2213   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2214     {
2215       internal_fn ifn = gimple_call_internal_fn (call);
2216       int index = internal_fn_stored_value_index (ifn);
2217       gcc_assert (index >= 0);
2218       return gimple_call_arg (call, index);
2219     }
2220   gcc_unreachable ();
2221 }
2222 
2223 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2224 
2225    This function returns a vector type which can be composed with NETLS pieces,
2226    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
2227    same vector size as the return vector.  It checks target whether supports
2228    pieces-size vector mode for construction firstly, if target fails to, check
2229    pieces-size scalar mode for construction further.  It returns NULL_TREE if
2230    fails to find the available composition.
2231 
2232    For example, for (vtype=V16QI, nelts=4), we can probably get:
2233      - V16QI with PTYPE V4QI.
2234      - V4SI with PTYPE SI.
2235      - NULL_TREE.  */
2236 
2237 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2238 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2239 {
2240   gcc_assert (VECTOR_TYPE_P (vtype));
2241   gcc_assert (known_gt (nelts, 0U));
2242 
2243   machine_mode vmode = TYPE_MODE (vtype);
2244   if (!VECTOR_MODE_P (vmode))
2245     return NULL_TREE;
2246 
2247   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2248   unsigned int pbsize;
2249   if (constant_multiple_p (vbsize, nelts, &pbsize))
2250     {
2251       /* First check if vec_init optab supports construction from
2252 	 vector pieces directly.  */
2253       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2254       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2255       machine_mode rmode;
2256       if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2257 	  && (convert_optab_handler (vec_init_optab, vmode, rmode)
2258 	      != CODE_FOR_nothing))
2259 	{
2260 	  *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2261 	  return vtype;
2262 	}
2263 
2264       /* Otherwise check if exists an integer type of the same piece size and
2265 	 if vec_init optab supports construction from it directly.  */
2266       if (int_mode_for_size (pbsize, 0).exists (&elmode)
2267 	  && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2268 	  && (convert_optab_handler (vec_init_optab, rmode, elmode)
2269 	      != CODE_FOR_nothing))
2270 	{
2271 	  *ptype = build_nonstandard_integer_type (pbsize, 1);
2272 	  return build_vector_type (*ptype, nelts);
2273 	}
2274     }
2275 
2276   return NULL_TREE;
2277 }
2278 
2279 /* A subroutine of get_load_store_type, with a subset of the same
2280    arguments.  Handle the case where STMT_INFO is part of a grouped load
2281    or store.
2282 
2283    For stores, the statements in the group are all consecutive
2284    and there is no gap at the end.  For loads, the statements in the
2285    group might not be consecutive; there can be gaps between statements
2286    as well as at the end.  */
2287 
2288 static bool
get_group_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2289 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2290 			   bool masked_p, vec_load_store_type vls_type,
2291 			   vect_memory_access_type *memory_access_type,
2292 			   gather_scatter_info *gs_info)
2293 {
2294   vec_info *vinfo = stmt_info->vinfo;
2295   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2296   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2297   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2298   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2299   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2300   bool single_element_p = (stmt_info == first_stmt_info
2301 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2302   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2303   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304 
2305   /* True if the vectorized statements would access beyond the last
2306      statement in the group.  */
2307   bool overrun_p = false;
2308 
2309   /* True if we can cope with such overrun by peeling for gaps, so that
2310      there is at least one final scalar iteration after the vector loop.  */
2311   bool can_overrun_p = (!masked_p
2312 			&& vls_type == VLS_LOAD
2313 			&& loop_vinfo
2314 			&& !loop->inner);
2315 
2316   /* There can only be a gap at the end of the group if the stride is
2317      known at compile time.  */
2318   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2319 
2320   /* Stores can't yet have gaps.  */
2321   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2322 
2323   if (slp)
2324     {
2325       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2326 	{
2327 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2328 	     separated by the stride, until we have a complete vector.
2329 	     Fall back to scalar accesses if that isn't possible.  */
2330 	  if (multiple_p (nunits, group_size))
2331 	    *memory_access_type = VMAT_STRIDED_SLP;
2332 	  else
2333 	    *memory_access_type = VMAT_ELEMENTWISE;
2334 	}
2335       else
2336 	{
2337 	  overrun_p = loop_vinfo && gap != 0;
2338 	  if (overrun_p && vls_type != VLS_LOAD)
2339 	    {
2340 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2341 			       "Grouped store with gaps requires"
2342 			       " non-consecutive accesses\n");
2343 	      return false;
2344 	    }
2345 	  /* An overrun is fine if the trailing elements are smaller
2346 	     than the alignment boundary B.  Every vector access will
2347 	     be a multiple of B and so we are guaranteed to access a
2348 	     non-gap element in the same B-sized block.  */
2349 	  if (overrun_p
2350 	      && gap < (vect_known_alignment_in_bytes (first_dr_info)
2351 			/ vect_get_scalar_dr_size (first_dr_info)))
2352 	    overrun_p = false;
2353 
2354 	  /* If the gap splits the vector in half and the target
2355 	     can do half-vector operations avoid the epilogue peeling
2356 	     by simply loading half of the vector only.  Usually
2357 	     the construction with an upper zero half will be elided.  */
2358 	  dr_alignment_support alignment_support_scheme;
2359 	  tree half_vtype;
2360 	  if (overrun_p
2361 	      && !masked_p
2362 	      && (((alignment_support_scheme
2363 		      = vect_supportable_dr_alignment (first_dr_info, false)))
2364 		   == dr_aligned
2365 		  || alignment_support_scheme == dr_unaligned_supported)
2366 	      && known_eq (nunits, (group_size - gap) * 2)
2367 	      && known_eq (nunits, group_size)
2368 	      && (vector_vector_composition_type (vectype, 2, &half_vtype)
2369 		  != NULL_TREE))
2370 	    overrun_p = false;
2371 
2372 	  if (overrun_p && !can_overrun_p)
2373 	    {
2374 	      if (dump_enabled_p ())
2375 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 				 "Peeling for outer loop is not supported\n");
2377 	      return false;
2378 	    }
2379 	  int cmp = compare_step_with_zero (stmt_info);
2380 	  if (cmp < 0)
2381 	    *memory_access_type = get_negative_load_store_type
2382 	      (stmt_info, vectype, vls_type, 1);
2383 	  else
2384 	    {
2385 	      gcc_assert (!loop_vinfo || cmp > 0);
2386 	      *memory_access_type = VMAT_CONTIGUOUS;
2387 	    }
2388 	}
2389     }
2390   else
2391     {
2392       /* We can always handle this case using elementwise accesses,
2393 	 but see if something more efficient is available.  */
2394       *memory_access_type = VMAT_ELEMENTWISE;
2395 
2396       /* If there is a gap at the end of the group then these optimizations
2397 	 would access excess elements in the last iteration.  */
2398       bool would_overrun_p = (gap != 0);
2399       /* An overrun is fine if the trailing elements are smaller than the
2400 	 alignment boundary B.  Every vector access will be a multiple of B
2401 	 and so we are guaranteed to access a non-gap element in the
2402 	 same B-sized block.  */
2403       if (would_overrun_p
2404 	  && !masked_p
2405 	  && gap < (vect_known_alignment_in_bytes (first_dr_info)
2406 		    / vect_get_scalar_dr_size (first_dr_info)))
2407 	would_overrun_p = false;
2408 
2409       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2410 	  && (can_overrun_p || !would_overrun_p)
2411 	  && compare_step_with_zero (stmt_info) > 0)
2412 	{
2413 	  /* First cope with the degenerate case of a single-element
2414 	     vector.  */
2415 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2416 	    ;
2417 
2418 	  /* Otherwise try using LOAD/STORE_LANES.  */
2419 	  else if (vls_type == VLS_LOAD
2420 		   ? vect_load_lanes_supported (vectype, group_size, masked_p)
2421 		   : vect_store_lanes_supported (vectype, group_size,
2422 						 masked_p))
2423 	    {
2424 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2425 	      overrun_p = would_overrun_p;
2426 	    }
2427 
2428 	  /* If that fails, try using permuting loads.  */
2429 	  else if (vls_type == VLS_LOAD
2430 		   ? vect_grouped_load_supported (vectype, single_element_p,
2431 						  group_size)
2432 		   : vect_grouped_store_supported (vectype, group_size))
2433 	    {
2434 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2435 	      overrun_p = would_overrun_p;
2436 	    }
2437 	}
2438 
2439       /* As a last resort, trying using a gather load or scatter store.
2440 
2441 	 ??? Although the code can handle all group sizes correctly,
2442 	 it probably isn't a win to use separate strided accesses based
2443 	 on nearby locations.  Or, even if it's a win over scalar code,
2444 	 it might not be a win over vectorizing at a lower VF, if that
2445 	 allows us to use contiguous accesses.  */
2446       if (*memory_access_type == VMAT_ELEMENTWISE
2447 	  && single_element_p
2448 	  && loop_vinfo
2449 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2450 						 masked_p, gs_info))
2451 	*memory_access_type = VMAT_GATHER_SCATTER;
2452     }
2453 
2454   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2455     {
2456       /* STMT is the leader of the group. Check the operands of all the
2457 	 stmts of the group.  */
2458       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2459       while (next_stmt_info)
2460 	{
2461 	  tree op = vect_get_store_rhs (next_stmt_info);
2462 	  enum vect_def_type dt;
2463 	  if (!vect_is_simple_use (op, vinfo, &dt))
2464 	    {
2465 	      if (dump_enabled_p ())
2466 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2467 				 "use not simple.\n");
2468 	      return false;
2469 	    }
2470 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2471 	}
2472     }
2473 
2474   if (overrun_p)
2475     {
2476       gcc_assert (can_overrun_p);
2477       if (dump_enabled_p ())
2478 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2479 			 "Data access with gaps requires scalar "
2480 			 "epilogue loop\n");
2481       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2482     }
2483 
2484   return true;
2485 }
2486 
2487 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2488    if there is a memory access type that the vectorized form can use,
2489    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2490    or scatters, fill in GS_INFO accordingly.
2491 
2492    SLP says whether we're performing SLP rather than loop vectorization.
2493    MASKED_P is true if the statement is conditional on a vectorized mask.
2494    VECTYPE is the vector type that the vectorized statements will use.
2495    NCOPIES is the number of vector statements that will be needed.  */
2496 
2497 static bool
get_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2498 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2499 		     bool masked_p, vec_load_store_type vls_type,
2500 		     unsigned int ncopies,
2501 		     vect_memory_access_type *memory_access_type,
2502 		     gather_scatter_info *gs_info)
2503 {
2504   vec_info *vinfo = stmt_info->vinfo;
2505   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2506   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2507   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2508     {
2509       *memory_access_type = VMAT_GATHER_SCATTER;
2510       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2511 	gcc_unreachable ();
2512       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2513 				    &gs_info->offset_dt,
2514 				    &gs_info->offset_vectype))
2515 	{
2516 	  if (dump_enabled_p ())
2517 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 			     "%s index use not simple.\n",
2519 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2520 	  return false;
2521 	}
2522     }
2523   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2524     {
2525       if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2526 				      vls_type, memory_access_type, gs_info))
2527 	return false;
2528     }
2529   else if (STMT_VINFO_STRIDED_P (stmt_info))
2530     {
2531       gcc_assert (!slp);
2532       if (loop_vinfo
2533 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2534 						 masked_p, gs_info))
2535 	*memory_access_type = VMAT_GATHER_SCATTER;
2536       else
2537 	*memory_access_type = VMAT_ELEMENTWISE;
2538     }
2539   else
2540     {
2541       int cmp = compare_step_with_zero (stmt_info);
2542       if (cmp < 0)
2543 	*memory_access_type = get_negative_load_store_type
2544 	  (stmt_info, vectype, vls_type, ncopies);
2545       else if (cmp == 0)
2546 	{
2547 	  gcc_assert (vls_type == VLS_LOAD);
2548 	  *memory_access_type = VMAT_INVARIANT;
2549 	}
2550       else
2551 	*memory_access_type = VMAT_CONTIGUOUS;
2552     }
2553 
2554   if ((*memory_access_type == VMAT_ELEMENTWISE
2555        || *memory_access_type == VMAT_STRIDED_SLP)
2556       && !nunits.is_constant ())
2557     {
2558       if (dump_enabled_p ())
2559 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2560 			 "Not using elementwise accesses due to variable "
2561 			 "vectorization factor.\n");
2562       return false;
2563     }
2564 
2565   /* FIXME: At the moment the cost model seems to underestimate the
2566      cost of using elementwise accesses.  This check preserves the
2567      traditional behavior until that can be fixed.  */
2568   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2569   if (!first_stmt_info)
2570     first_stmt_info = stmt_info;
2571   if (*memory_access_type == VMAT_ELEMENTWISE
2572       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2573       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2574 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2575 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2576     {
2577       if (dump_enabled_p ())
2578 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2579 			 "not falling back to elementwise accesses\n");
2580       return false;
2581     }
2582   return true;
2583 }
2584 
2585 /* Return true if boolean argument MASK is suitable for vectorizing
2586    conditional operation STMT_INFO.  When returning true, store the type
2587    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2588    in *MASK_VECTYPE_OUT.  */
2589 
2590 static bool
vect_check_scalar_mask(stmt_vec_info stmt_info,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2591 vect_check_scalar_mask (stmt_vec_info stmt_info, tree mask,
2592 			vect_def_type *mask_dt_out,
2593 			tree *mask_vectype_out)
2594 {
2595   vec_info *vinfo = stmt_info->vinfo;
2596   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2597     {
2598       if (dump_enabled_p ())
2599 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2600 			 "mask argument is not a boolean.\n");
2601       return false;
2602     }
2603 
2604   if (TREE_CODE (mask) != SSA_NAME)
2605     {
2606       if (dump_enabled_p ())
2607 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2608 			 "mask argument is not an SSA name.\n");
2609       return false;
2610     }
2611 
2612   enum vect_def_type mask_dt;
2613   tree mask_vectype;
2614   if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2615     {
2616       if (dump_enabled_p ())
2617 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618 			 "mask use not simple.\n");
2619       return false;
2620     }
2621 
2622   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2623   if (!mask_vectype)
2624     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2625 
2626   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2627     {
2628       if (dump_enabled_p ())
2629 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2630 			 "could not find an appropriate vector mask type.\n");
2631       return false;
2632     }
2633 
2634   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2635 		TYPE_VECTOR_SUBPARTS (vectype)))
2636     {
2637       if (dump_enabled_p ())
2638 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2639 			 "vector mask type %T"
2640 			 " does not match vector data type %T.\n",
2641 			 mask_vectype, vectype);
2642 
2643       return false;
2644     }
2645 
2646   *mask_dt_out = mask_dt;
2647   *mask_vectype_out = mask_vectype;
2648   return true;
2649 }
2650 
2651 /* Return true if stored value RHS is suitable for vectorizing store
2652    statement STMT_INFO.  When returning true, store the type of the
2653    definition in *RHS_DT_OUT, the type of the vectorized store value in
2654    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2655 
2656 static bool
vect_check_store_rhs(stmt_vec_info stmt_info,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2657 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2658 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2659 		      vec_load_store_type *vls_type_out)
2660 {
2661   /* In the case this is a store from a constant make sure
2662      native_encode_expr can handle it.  */
2663   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2664     {
2665       if (dump_enabled_p ())
2666 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2667 			 "cannot encode constant as a byte sequence.\n");
2668       return false;
2669     }
2670 
2671   enum vect_def_type rhs_dt;
2672   tree rhs_vectype;
2673   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2674     {
2675       if (dump_enabled_p ())
2676 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2677 			 "use not simple.\n");
2678       return false;
2679     }
2680 
2681   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2682   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2683     {
2684       if (dump_enabled_p ())
2685 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2686 			 "incompatible vector types.\n");
2687       return false;
2688     }
2689 
2690   *rhs_dt_out = rhs_dt;
2691   *rhs_vectype_out = rhs_vectype;
2692   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2693     *vls_type_out = VLS_STORE_INVARIANT;
2694   else
2695     *vls_type_out = VLS_STORE;
2696   return true;
2697 }
2698 
2699 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2700    Note that we support masks with floating-point type, in which case the
2701    floats are interpreted as a bitmask.  */
2702 
2703 static tree
vect_build_all_ones_mask(stmt_vec_info stmt_info,tree masktype)2704 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2705 {
2706   if (TREE_CODE (masktype) == INTEGER_TYPE)
2707     return build_int_cst (masktype, -1);
2708   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2709     {
2710       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2711       mask = build_vector_from_val (masktype, mask);
2712       return vect_init_vector (stmt_info, mask, masktype, NULL);
2713     }
2714   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2715     {
2716       REAL_VALUE_TYPE r;
2717       long tmp[6];
2718       for (int j = 0; j < 6; ++j)
2719 	tmp[j] = -1;
2720       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2721       tree mask = build_real (TREE_TYPE (masktype), r);
2722       mask = build_vector_from_val (masktype, mask);
2723       return vect_init_vector (stmt_info, mask, masktype, NULL);
2724     }
2725   gcc_unreachable ();
2726 }
2727 
2728 /* Build an all-zero merge value of type VECTYPE while vectorizing
2729    STMT_INFO as a gather load.  */
2730 
2731 static tree
vect_build_zero_merge_argument(stmt_vec_info stmt_info,tree vectype)2732 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2733 {
2734   tree merge;
2735   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2736     merge = build_int_cst (TREE_TYPE (vectype), 0);
2737   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2738     {
2739       REAL_VALUE_TYPE r;
2740       long tmp[6];
2741       for (int j = 0; j < 6; ++j)
2742 	tmp[j] = 0;
2743       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2744       merge = build_real (TREE_TYPE (vectype), r);
2745     }
2746   else
2747     gcc_unreachable ();
2748   merge = build_vector_from_val (vectype, merge);
2749   return vect_init_vector (stmt_info, merge, vectype, NULL);
2750 }
2751 
2752 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2753    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2754    the gather load operation.  If the load is conditional, MASK is the
2755    unvectorized condition and MASK_DT is its definition type, otherwise
2756    MASK is null.  */
2757 
2758 static void
vect_build_gather_load_calls(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,gather_scatter_info * gs_info,tree mask)2759 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2760 			      gimple_stmt_iterator *gsi,
2761 			      stmt_vec_info *vec_stmt,
2762 			      gather_scatter_info *gs_info,
2763 			      tree mask)
2764 {
2765   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2766   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2767   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2768   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2769   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2770   edge pe = loop_preheader_edge (loop);
2771   enum { NARROW, NONE, WIDEN } modifier;
2772   poly_uint64 gather_off_nunits
2773     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2774 
2775   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2776   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2777   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2778   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2779   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2780   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2781   tree scaletype = TREE_VALUE (arglist);
2782   tree real_masktype = masktype;
2783   gcc_checking_assert (types_compatible_p (srctype, rettype)
2784 		       && (!mask
2785 			   || TREE_CODE (masktype) == INTEGER_TYPE
2786 			   || types_compatible_p (srctype, masktype)));
2787   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2788     masktype = truth_type_for (srctype);
2789 
2790   tree mask_halftype = masktype;
2791   tree perm_mask = NULL_TREE;
2792   tree mask_perm_mask = NULL_TREE;
2793   if (known_eq (nunits, gather_off_nunits))
2794     modifier = NONE;
2795   else if (known_eq (nunits * 2, gather_off_nunits))
2796     {
2797       modifier = WIDEN;
2798 
2799       /* Currently widening gathers and scatters are only supported for
2800 	 fixed-length vectors.  */
2801       int count = gather_off_nunits.to_constant ();
2802       vec_perm_builder sel (count, count, 1);
2803       for (int i = 0; i < count; ++i)
2804 	sel.quick_push (i | (count / 2));
2805 
2806       vec_perm_indices indices (sel, 1, count);
2807       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2808 					      indices);
2809     }
2810   else if (known_eq (nunits, gather_off_nunits * 2))
2811     {
2812       modifier = NARROW;
2813 
2814       /* Currently narrowing gathers and scatters are only supported for
2815 	 fixed-length vectors.  */
2816       int count = nunits.to_constant ();
2817       vec_perm_builder sel (count, count, 1);
2818       sel.quick_grow (count);
2819       for (int i = 0; i < count; ++i)
2820 	sel[i] = i < count / 2 ? i : i + count / 2;
2821       vec_perm_indices indices (sel, 2, count);
2822       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2823 
2824       ncopies *= 2;
2825 
2826       if (mask && masktype == real_masktype)
2827 	{
2828 	  for (int i = 0; i < count; ++i)
2829 	    sel[i] = i | (count / 2);
2830 	  indices.new_vector (sel, 2, count);
2831 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2832 	}
2833       else if (mask)
2834 	mask_halftype = truth_type_for (gs_info->offset_vectype);
2835     }
2836   else
2837     gcc_unreachable ();
2838 
2839   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2840   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2841 
2842   tree ptr = fold_convert (ptrtype, gs_info->base);
2843   if (!is_gimple_min_invariant (ptr))
2844     {
2845       gimple_seq seq;
2846       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2847       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2848       gcc_assert (!new_bb);
2849     }
2850 
2851   tree scale = build_int_cst (scaletype, gs_info->scale);
2852 
2853   tree vec_oprnd0 = NULL_TREE;
2854   tree vec_mask = NULL_TREE;
2855   tree src_op = NULL_TREE;
2856   tree mask_op = NULL_TREE;
2857   tree prev_res = NULL_TREE;
2858   stmt_vec_info prev_stmt_info = NULL;
2859 
2860   if (!mask)
2861     {
2862       src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2863       mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2864     }
2865 
2866   for (int j = 0; j < ncopies; ++j)
2867     {
2868       tree op, var;
2869       if (modifier == WIDEN && (j & 1))
2870 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2871 				   perm_mask, stmt_info, gsi);
2872       else if (j == 0)
2873 	op = vec_oprnd0
2874 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2875       else
2876 	op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2877 							  vec_oprnd0);
2878 
2879       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2880 	{
2881 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2882 				TYPE_VECTOR_SUBPARTS (idxtype)));
2883 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2884 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2885 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2886 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2887 	  op = var;
2888 	}
2889 
2890       if (mask)
2891 	{
2892 	  if (mask_perm_mask && (j & 1))
2893 	    mask_op = permute_vec_elements (mask_op, mask_op,
2894 					    mask_perm_mask, stmt_info, gsi);
2895 	  else
2896 	    {
2897 	      if (j == 0)
2898 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2899 	      else if (modifier != NARROW || (j & 1) == 0)
2900 		vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2901 							   vec_mask);
2902 
2903 	      mask_op = vec_mask;
2904 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2905 		{
2906 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2907 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2908 		  gcc_assert (known_eq (sub1, sub2));
2909 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2910 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2911 		  gassign *new_stmt
2912 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2913 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2914 		  mask_op = var;
2915 		}
2916 	    }
2917 	  if (modifier == NARROW && masktype != real_masktype)
2918 	    {
2919 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2920 	      gassign *new_stmt
2921 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2922 						    : VEC_UNPACK_LO_EXPR,
2923 				       mask_op);
2924 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2925 	      mask_op = var;
2926 	    }
2927 	  src_op = mask_op;
2928 	}
2929 
2930       tree mask_arg = mask_op;
2931       if (masktype != real_masktype)
2932 	{
2933 	  tree utype, optype = TREE_TYPE (mask_op);
2934 	  if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2935 	    utype = real_masktype;
2936 	  else
2937 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2938 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
2939 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2940 	  gassign *new_stmt
2941 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2942 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2943 	  mask_arg = var;
2944 	  if (!useless_type_conversion_p (real_masktype, utype))
2945 	    {
2946 	      gcc_assert (TYPE_PRECISION (utype)
2947 			  <= TYPE_PRECISION (real_masktype));
2948 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2949 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2950 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2951 	      mask_arg = var;
2952 	    }
2953 	  src_op = build_zero_cst (srctype);
2954 	}
2955       gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2956 					   mask_arg, scale);
2957 
2958       stmt_vec_info new_stmt_info;
2959       if (!useless_type_conversion_p (vectype, rettype))
2960 	{
2961 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2962 				TYPE_VECTOR_SUBPARTS (rettype)));
2963 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2964 	  gimple_call_set_lhs (new_call, op);
2965 	  vect_finish_stmt_generation (stmt_info, new_call, gsi);
2966 	  var = make_ssa_name (vec_dest);
2967 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2968 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2969 	  new_stmt_info
2970 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2971 	}
2972       else
2973 	{
2974 	  var = make_ssa_name (vec_dest, new_call);
2975 	  gimple_call_set_lhs (new_call, var);
2976 	  new_stmt_info
2977 	    = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2978 	}
2979 
2980       if (modifier == NARROW)
2981 	{
2982 	  if ((j & 1) == 0)
2983 	    {
2984 	      prev_res = var;
2985 	      continue;
2986 	    }
2987 	  var = permute_vec_elements (prev_res, var, perm_mask,
2988 				      stmt_info, gsi);
2989 	  new_stmt_info = loop_vinfo->lookup_def (var);
2990 	}
2991 
2992       if (prev_stmt_info == NULL)
2993 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2994       else
2995 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2996       prev_stmt_info = new_stmt_info;
2997     }
2998 }
2999 
3000 /* Prepare the base and offset in GS_INFO for vectorization.
3001    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3002    to the vectorized offset argument for the first copy of STMT_INFO.
3003    STMT_INFO is the statement described by GS_INFO and LOOP is the
3004    containing loop.  */
3005 
3006 static void
vect_get_gather_scatter_ops(class loop * loop,stmt_vec_info stmt_info,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)3007 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
3008 			     gather_scatter_info *gs_info,
3009 			     tree *dataref_ptr, tree *vec_offset)
3010 {
3011   gimple_seq stmts = NULL;
3012   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3013   if (stmts != NULL)
3014     {
3015       basic_block new_bb;
3016       edge pe = loop_preheader_edge (loop);
3017       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3018       gcc_assert (!new_bb);
3019     }
3020   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
3021 					      gs_info->offset_vectype);
3022 }
3023 
3024 /* Prepare to implement a grouped or strided load or store using
3025    the gather load or scatter store operation described by GS_INFO.
3026    STMT_INFO is the load or store statement.
3027 
3028    Set *DATAREF_BUMP to the amount that should be added to the base
3029    address after each copy of the vectorized statement.  Set *VEC_OFFSET
3030    to an invariant offset vector in which element I has the value
3031    I * DR_STEP / SCALE.  */
3032 
3033 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3034 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3035 				 loop_vec_info loop_vinfo,
3036 				 gather_scatter_info *gs_info,
3037 				 tree *dataref_bump, tree *vec_offset)
3038 {
3039   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3040   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3041   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3042   gimple_seq stmts;
3043 
3044   tree bump = size_binop (MULT_EXPR,
3045 			  fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3046 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3047   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3048   if (stmts)
3049     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3050 
3051   /* The offset given in GS_INFO can have pointer type, so use the element
3052      type of the vector instead.  */
3053   tree offset_type = TREE_TYPE (gs_info->offset);
3054   offset_type = TREE_TYPE (gs_info->offset_vectype);
3055 
3056   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
3057   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3058 			  ssize_int (gs_info->scale));
3059   step = fold_convert (offset_type, step);
3060   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3061 
3062   /* Create {0, X, X*2, X*3, ...}.  */
3063   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3064 			      build_zero_cst (offset_type), step);
3065   if (stmts)
3066     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3067 }
3068 
3069 /* Return the amount that should be added to a vector pointer to move
3070    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
3071    being vectorized and MEMORY_ACCESS_TYPE describes the type of
3072    vectorization.  */
3073 
3074 static tree
vect_get_data_ptr_increment(dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3075 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3076 			     vect_memory_access_type memory_access_type)
3077 {
3078   if (memory_access_type == VMAT_INVARIANT)
3079     return size_zero_node;
3080 
3081   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3082   tree step = vect_dr_behavior (dr_info)->step;
3083   if (tree_int_cst_sgn (step) == -1)
3084     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3085   return iv_step;
3086 }
3087 
3088 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
3089 
3090 static bool
vectorizable_bswap(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,tree vectype_in,stmt_vector_for_cost * cost_vec)3091 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3092 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
3093 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
3094 {
3095   tree op, vectype;
3096   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3097   vec_info *vinfo = stmt_info->vinfo;
3098   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3099   unsigned ncopies;
3100 
3101   op = gimple_call_arg (stmt, 0);
3102   vectype = STMT_VINFO_VECTYPE (stmt_info);
3103   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3104 
3105   /* Multiple types in SLP are handled by creating the appropriate number of
3106      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3107      case of SLP.  */
3108   if (slp_node)
3109     ncopies = 1;
3110   else
3111     ncopies = vect_get_num_copies (loop_vinfo, vectype);
3112 
3113   gcc_assert (ncopies >= 1);
3114 
3115   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3116   if (! char_vectype)
3117     return false;
3118 
3119   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3120   unsigned word_bytes;
3121   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3122     return false;
3123 
3124   /* The encoding uses one stepped pattern for each byte in the word.  */
3125   vec_perm_builder elts (num_bytes, word_bytes, 3);
3126   for (unsigned i = 0; i < 3; ++i)
3127     for (unsigned j = 0; j < word_bytes; ++j)
3128       elts.quick_push ((i + 1) * word_bytes - j - 1);
3129 
3130   vec_perm_indices indices (elts, 1, num_bytes);
3131   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3132     return false;
3133 
3134   if (! vec_stmt)
3135     {
3136       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3137       DUMP_VECT_SCOPE ("vectorizable_bswap");
3138       if (! slp_node)
3139 	{
3140 	  record_stmt_cost (cost_vec,
3141 			    1, vector_stmt, stmt_info, 0, vect_prologue);
3142 	  record_stmt_cost (cost_vec,
3143 			    ncopies, vec_perm, stmt_info, 0, vect_body);
3144 	}
3145       return true;
3146     }
3147 
3148   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3149 
3150   /* Transform.  */
3151   vec<tree> vec_oprnds = vNULL;
3152   stmt_vec_info new_stmt_info = NULL;
3153   stmt_vec_info prev_stmt_info = NULL;
3154   for (unsigned j = 0; j < ncopies; j++)
3155     {
3156       /* Handle uses.  */
3157       if (j == 0)
3158 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3159       else
3160 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3161 
3162       /* Arguments are ready. create the new vector stmt.  */
3163       unsigned i;
3164       tree vop;
3165       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3166        {
3167 	 gimple *new_stmt;
3168 	 tree tem = make_ssa_name (char_vectype);
3169 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3170 						      char_vectype, vop));
3171 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3172 	 tree tem2 = make_ssa_name (char_vectype);
3173 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3174 					 tem, tem, bswap_vconst);
3175 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3176 	 tem = make_ssa_name (vectype);
3177 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3178 						      vectype, tem2));
3179 	 new_stmt_info
3180 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3181          if (slp_node)
3182 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3183        }
3184 
3185       if (slp_node)
3186         continue;
3187 
3188       if (j == 0)
3189 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3190       else
3191 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3192 
3193       prev_stmt_info = new_stmt_info;
3194     }
3195 
3196   vec_oprnds.release ();
3197   return true;
3198 }
3199 
3200 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3201    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3202    in a single step.  On success, store the binary pack code in
3203    *CONVERT_CODE.  */
3204 
3205 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3206 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3207 			  tree_code *convert_code)
3208 {
3209   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3210       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3211     return false;
3212 
3213   tree_code code;
3214   int multi_step_cvt = 0;
3215   auto_vec <tree, 8> interm_types;
3216   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3217 					&code, &multi_step_cvt, &interm_types)
3218       || multi_step_cvt)
3219     return false;
3220 
3221   *convert_code = code;
3222   return true;
3223 }
3224 
3225 /* Function vectorizable_call.
3226 
3227    Check if STMT_INFO performs a function call that can be vectorized.
3228    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3229    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3230    Return true if STMT_INFO is vectorizable in this way.  */
3231 
3232 static bool
vectorizable_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3233 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3234 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
3235 		   stmt_vector_for_cost *cost_vec)
3236 {
3237   gcall *stmt;
3238   tree vec_dest;
3239   tree scalar_dest;
3240   tree op;
3241   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3242   stmt_vec_info prev_stmt_info;
3243   tree vectype_out, vectype_in;
3244   poly_uint64 nunits_in;
3245   poly_uint64 nunits_out;
3246   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3247   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3248   vec_info *vinfo = stmt_info->vinfo;
3249   tree fndecl, new_temp, rhs_type;
3250   enum vect_def_type dt[4]
3251     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3252 	vect_unknown_def_type };
3253   tree vectypes[ARRAY_SIZE (dt)] = {};
3254   int ndts = ARRAY_SIZE (dt);
3255   int ncopies, j;
3256   auto_vec<tree, 8> vargs;
3257   auto_vec<tree, 8> orig_vargs;
3258   enum { NARROW, NONE, WIDEN } modifier;
3259   size_t i, nargs;
3260   tree lhs;
3261 
3262   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3263     return false;
3264 
3265   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3266       && ! vec_stmt)
3267     return false;
3268 
3269   /* Is STMT_INFO a vectorizable call?   */
3270   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3271   if (!stmt)
3272     return false;
3273 
3274   if (gimple_call_internal_p (stmt)
3275       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3276 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3277     /* Handled by vectorizable_load and vectorizable_store.  */
3278     return false;
3279 
3280   if (gimple_call_lhs (stmt) == NULL_TREE
3281       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3282     return false;
3283 
3284   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3285 
3286   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3287 
3288   /* Process function arguments.  */
3289   rhs_type = NULL_TREE;
3290   vectype_in = NULL_TREE;
3291   nargs = gimple_call_num_args (stmt);
3292 
3293   /* Bail out if the function has more than three arguments, we do not have
3294      interesting builtin functions to vectorize with more than two arguments
3295      except for fma.  No arguments is also not good.  */
3296   if (nargs == 0 || nargs > 4)
3297     return false;
3298 
3299   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
3300   combined_fn cfn = gimple_call_combined_fn (stmt);
3301   if (cfn == CFN_GOMP_SIMD_LANE)
3302     {
3303       nargs = 0;
3304       rhs_type = unsigned_type_node;
3305     }
3306 
3307   int mask_opno = -1;
3308   if (internal_fn_p (cfn))
3309     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3310 
3311   for (i = 0; i < nargs; i++)
3312     {
3313       op = gimple_call_arg (stmt, i);
3314 
3315       if ((int) i == mask_opno)
3316 	{
3317 	  if (!vect_check_scalar_mask (stmt_info, op, &dt[i], &vectypes[i]))
3318 	    return false;
3319 	  continue;
3320 	}
3321 
3322       if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3323 	{
3324 	  if (dump_enabled_p ())
3325 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3326 			     "use not simple.\n");
3327 	  return false;
3328 	}
3329 
3330       /* We can only handle calls with arguments of the same type.  */
3331       if (rhs_type
3332 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3333 	{
3334 	  if (dump_enabled_p ())
3335 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3336                              "argument types differ.\n");
3337 	  return false;
3338 	}
3339       if (!rhs_type)
3340 	rhs_type = TREE_TYPE (op);
3341 
3342       if (!vectype_in)
3343 	vectype_in = vectypes[i];
3344       else if (vectypes[i]
3345 	       && !types_compatible_p (vectypes[i], vectype_in))
3346 	{
3347 	  if (dump_enabled_p ())
3348 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3349                              "argument vector types differ.\n");
3350 	  return false;
3351 	}
3352     }
3353   /* If all arguments are external or constant defs, infer the vector type
3354      from the scalar type.  */
3355   if (!vectype_in)
3356     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3357   if (vec_stmt)
3358     gcc_assert (vectype_in);
3359   if (!vectype_in)
3360     {
3361       if (dump_enabled_p ())
3362 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3363 			 "no vectype for scalar type %T\n", rhs_type);
3364 
3365       return false;
3366     }
3367   /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3368      just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
3369      are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3370      by a pack of the two vectors into an SI vector.  We would need
3371      separate code to handle direct VnDI->VnSI IFN_CTZs.  */
3372   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3373     {
3374       if (dump_enabled_p ())
3375 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3376 			 "mismatched vector sizes %T and %T\n",
3377 			 vectype_in, vectype_out);
3378       return false;
3379     }
3380 
3381   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3382       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3383     {
3384       if (dump_enabled_p ())
3385 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3386 			 "mixed mask and nonmask vector types\n");
3387       return false;
3388     }
3389 
3390   /* FORNOW */
3391   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3392   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3393   if (known_eq (nunits_in * 2, nunits_out))
3394     modifier = NARROW;
3395   else if (known_eq (nunits_out, nunits_in))
3396     modifier = NONE;
3397   else if (known_eq (nunits_out * 2, nunits_in))
3398     modifier = WIDEN;
3399   else
3400     return false;
3401 
3402   /* We only handle functions that do not read or clobber memory.  */
3403   if (gimple_vuse (stmt))
3404     {
3405       if (dump_enabled_p ())
3406 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3407 			 "function reads from or writes to memory.\n");
3408       return false;
3409     }
3410 
3411   /* For now, we only vectorize functions if a target specific builtin
3412      is available.  TODO -- in some cases, it might be profitable to
3413      insert the calls for pieces of the vector, in order to be able
3414      to vectorize other operations in the loop.  */
3415   fndecl = NULL_TREE;
3416   internal_fn ifn = IFN_LAST;
3417   tree callee = gimple_call_fndecl (stmt);
3418 
3419   /* First try using an internal function.  */
3420   tree_code convert_code = ERROR_MARK;
3421   if (cfn != CFN_LAST
3422       && (modifier == NONE
3423 	  || (modifier == NARROW
3424 	      && simple_integer_narrowing (vectype_out, vectype_in,
3425 					   &convert_code))))
3426     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3427 					  vectype_in);
3428 
3429   /* If that fails, try asking for a target-specific built-in function.  */
3430   if (ifn == IFN_LAST)
3431     {
3432       if (cfn != CFN_LAST)
3433 	fndecl = targetm.vectorize.builtin_vectorized_function
3434 	  (cfn, vectype_out, vectype_in);
3435       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3436 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3437 	  (callee, vectype_out, vectype_in);
3438     }
3439 
3440   if (ifn == IFN_LAST && !fndecl)
3441     {
3442       if (cfn == CFN_GOMP_SIMD_LANE
3443 	  && !slp_node
3444 	  && loop_vinfo
3445 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3446 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3447 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3448 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3449 	{
3450 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3451 	     { 0, 1, 2, ... vf - 1 } vector.  */
3452 	  gcc_assert (nargs == 0);
3453 	}
3454       else if (modifier == NONE
3455 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3456 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3457 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3458 	return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3459 				   vectype_in, cost_vec);
3460       else
3461 	{
3462 	  if (dump_enabled_p ())
3463 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3464 			     "function is not vectorizable.\n");
3465 	  return false;
3466 	}
3467     }
3468 
3469   if (slp_node)
3470     ncopies = 1;
3471   else if (modifier == NARROW && ifn == IFN_LAST)
3472     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3473   else
3474     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3475 
3476   /* Sanity check: make sure that at least one copy of the vectorized stmt
3477      needs to be generated.  */
3478   gcc_assert (ncopies >= 1);
3479 
3480   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3481   if (!vec_stmt) /* transformation not required.  */
3482     {
3483       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3484       DUMP_VECT_SCOPE ("vectorizable_call");
3485       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3486       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3487 	record_stmt_cost (cost_vec, ncopies / 2,
3488 			  vec_promote_demote, stmt_info, 0, vect_body);
3489 
3490       if (loop_vinfo && mask_opno >= 0)
3491 	{
3492 	  unsigned int nvectors = (slp_node
3493 				   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3494 				   : ncopies);
3495 	  tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3496 	  vect_record_loop_mask (loop_vinfo, masks, nvectors,
3497 				 vectype_out, scalar_mask);
3498 	}
3499       return true;
3500     }
3501 
3502   /* Transform.  */
3503 
3504   if (dump_enabled_p ())
3505     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3506 
3507   /* Handle def.  */
3508   scalar_dest = gimple_call_lhs (stmt);
3509   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3510 
3511   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3512 
3513   stmt_vec_info new_stmt_info = NULL;
3514   prev_stmt_info = NULL;
3515   if (modifier == NONE || ifn != IFN_LAST)
3516     {
3517       tree prev_res = NULL_TREE;
3518       vargs.safe_grow (nargs);
3519       orig_vargs.safe_grow (nargs);
3520       for (j = 0; j < ncopies; ++j)
3521 	{
3522 	  /* Build argument list for the vectorized call.  */
3523 	  if (slp_node)
3524 	    {
3525 	      auto_vec<vec<tree> > vec_defs (nargs);
3526 	      vec<tree> vec_oprnds0;
3527 
3528 	      vect_get_slp_defs (slp_node, &vec_defs);
3529 	      vec_oprnds0 = vec_defs[0];
3530 
3531 	      /* Arguments are ready.  Create the new vector stmt.  */
3532 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3533 		{
3534 		  size_t k;
3535 		  for (k = 0; k < nargs; k++)
3536 		    {
3537 		      vec<tree> vec_oprndsk = vec_defs[k];
3538 		      vargs[k] = vec_oprndsk[i];
3539 		    }
3540 		  if (modifier == NARROW)
3541 		    {
3542 		      /* We don't define any narrowing conditional functions
3543 			 at present.  */
3544 		      gcc_assert (mask_opno < 0);
3545 		      tree half_res = make_ssa_name (vectype_in);
3546 		      gcall *call
3547 			= gimple_build_call_internal_vec (ifn, vargs);
3548 		      gimple_call_set_lhs (call, half_res);
3549 		      gimple_call_set_nothrow (call, true);
3550 		      vect_finish_stmt_generation (stmt_info, call, gsi);
3551 		      if ((i & 1) == 0)
3552 			{
3553 			  prev_res = half_res;
3554 			  continue;
3555 			}
3556 		      new_temp = make_ssa_name (vec_dest);
3557 		      gimple *new_stmt
3558 			= gimple_build_assign (new_temp, convert_code,
3559 					       prev_res, half_res);
3560 		      new_stmt_info
3561 			= vect_finish_stmt_generation (stmt_info, new_stmt,
3562 						       gsi);
3563 		    }
3564 		  else
3565 		    {
3566 		      if (mask_opno >= 0 && masked_loop_p)
3567 			{
3568 			  unsigned int vec_num = vec_oprnds0.length ();
3569 			  /* Always true for SLP.  */
3570 			  gcc_assert (ncopies == 1);
3571 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3572 							  vectype_out, i);
3573 			  vargs[mask_opno] = prepare_load_store_mask
3574 			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3575 			}
3576 
3577 		      gcall *call;
3578 		      if (ifn != IFN_LAST)
3579 			call = gimple_build_call_internal_vec (ifn, vargs);
3580 		      else
3581 			call = gimple_build_call_vec (fndecl, vargs);
3582 		      new_temp = make_ssa_name (vec_dest, call);
3583 		      gimple_call_set_lhs (call, new_temp);
3584 		      gimple_call_set_nothrow (call, true);
3585 		      new_stmt_info
3586 			= vect_finish_stmt_generation (stmt_info, call, gsi);
3587 		    }
3588 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3589 		}
3590 
3591 	      for (i = 0; i < nargs; i++)
3592 		{
3593 		  vec<tree> vec_oprndsi = vec_defs[i];
3594 		  vec_oprndsi.release ();
3595 		}
3596 	      continue;
3597 	    }
3598 
3599 	  for (i = 0; i < nargs; i++)
3600 	    {
3601 	      op = gimple_call_arg (stmt, i);
3602 	      if (j == 0)
3603 		vec_oprnd0
3604 		  = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3605 	      else
3606 		vec_oprnd0
3607 		  = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3608 
3609 	      orig_vargs[i] = vargs[i] = vec_oprnd0;
3610 	    }
3611 
3612 	  if (mask_opno >= 0 && masked_loop_p)
3613 	    {
3614 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3615 					      vectype_out, j);
3616 	      vargs[mask_opno]
3617 		= prepare_load_store_mask (TREE_TYPE (mask), mask,
3618 					   vargs[mask_opno], gsi);
3619 	    }
3620 
3621 	  if (cfn == CFN_GOMP_SIMD_LANE)
3622 	    {
3623 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3624 	      tree new_var
3625 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3626 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3627 	      vect_init_vector_1 (stmt_info, init_stmt, NULL);
3628 	      new_temp = make_ssa_name (vec_dest);
3629 	      gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3630 	      new_stmt_info
3631 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3632 	    }
3633 	  else if (modifier == NARROW)
3634 	    {
3635 	      /* We don't define any narrowing conditional functions at
3636 		 present.  */
3637 	      gcc_assert (mask_opno < 0);
3638 	      tree half_res = make_ssa_name (vectype_in);
3639 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3640 	      gimple_call_set_lhs (call, half_res);
3641 	      gimple_call_set_nothrow (call, true);
3642 	      vect_finish_stmt_generation (stmt_info, call, gsi);
3643 	      if ((j & 1) == 0)
3644 		{
3645 		  prev_res = half_res;
3646 		  continue;
3647 		}
3648 	      new_temp = make_ssa_name (vec_dest);
3649 	      gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3650 						       prev_res, half_res);
3651 	      new_stmt_info
3652 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3653 	    }
3654 	  else
3655 	    {
3656 	      gcall *call;
3657 	      if (ifn != IFN_LAST)
3658 		call = gimple_build_call_internal_vec (ifn, vargs);
3659 	      else
3660 		call = gimple_build_call_vec (fndecl, vargs);
3661 	      new_temp = make_ssa_name (vec_dest, call);
3662 	      gimple_call_set_lhs (call, new_temp);
3663 	      gimple_call_set_nothrow (call, true);
3664 	      new_stmt_info
3665 		= vect_finish_stmt_generation (stmt_info, call, gsi);
3666 	    }
3667 
3668 	  if (j == (modifier == NARROW ? 1 : 0))
3669 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3670 	  else
3671 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3672 
3673 	  prev_stmt_info = new_stmt_info;
3674 	}
3675     }
3676   else if (modifier == NARROW)
3677     {
3678       /* We don't define any narrowing conditional functions at present.  */
3679       gcc_assert (mask_opno < 0);
3680       for (j = 0; j < ncopies; ++j)
3681 	{
3682 	  /* Build argument list for the vectorized call.  */
3683 	  if (j == 0)
3684 	    vargs.create (nargs * 2);
3685 	  else
3686 	    vargs.truncate (0);
3687 
3688 	  if (slp_node)
3689 	    {
3690 	      auto_vec<vec<tree> > vec_defs (nargs);
3691 	      vec<tree> vec_oprnds0;
3692 
3693 	      vect_get_slp_defs (slp_node, &vec_defs);
3694 	      vec_oprnds0 = vec_defs[0];
3695 
3696 	      /* Arguments are ready.  Create the new vector stmt.  */
3697 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3698 		{
3699 		  size_t k;
3700 		  vargs.truncate (0);
3701 		  for (k = 0; k < nargs; k++)
3702 		    {
3703 		      vec<tree> vec_oprndsk = vec_defs[k];
3704 		      vargs.quick_push (vec_oprndsk[i]);
3705 		      vargs.quick_push (vec_oprndsk[i + 1]);
3706 		    }
3707 		  gcall *call;
3708 		  if (ifn != IFN_LAST)
3709 		    call = gimple_build_call_internal_vec (ifn, vargs);
3710 		  else
3711 		    call = gimple_build_call_vec (fndecl, vargs);
3712 		  new_temp = make_ssa_name (vec_dest, call);
3713 		  gimple_call_set_lhs (call, new_temp);
3714 		  gimple_call_set_nothrow (call, true);
3715 		  new_stmt_info
3716 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
3717 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3718 		}
3719 
3720 	      for (i = 0; i < nargs; i++)
3721 		{
3722 		  vec<tree> vec_oprndsi = vec_defs[i];
3723 		  vec_oprndsi.release ();
3724 		}
3725 	      continue;
3726 	    }
3727 
3728 	  for (i = 0; i < nargs; i++)
3729 	    {
3730 	      op = gimple_call_arg (stmt, i);
3731 	      if (j == 0)
3732 		{
3733 		  vec_oprnd0
3734 		    = vect_get_vec_def_for_operand (op, stmt_info,
3735 						    vectypes[i]);
3736 		  vec_oprnd1
3737 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3738 		}
3739 	      else
3740 		{
3741 		  vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3742 						2 * i + 1);
3743 		  vec_oprnd0
3744 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3745 		  vec_oprnd1
3746 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3747 		}
3748 
3749 	      vargs.quick_push (vec_oprnd0);
3750 	      vargs.quick_push (vec_oprnd1);
3751 	    }
3752 
3753 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3754 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3755 	  gimple_call_set_lhs (new_stmt, new_temp);
3756 	  new_stmt_info
3757 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3758 
3759 	  if (j == 0)
3760 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3761 	  else
3762 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3763 
3764 	  prev_stmt_info = new_stmt_info;
3765 	}
3766 
3767       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3768     }
3769   else
3770     /* No current target implements this case.  */
3771     return false;
3772 
3773   vargs.release ();
3774 
3775   /* The call in STMT might prevent it from being removed in dce.
3776      We however cannot remove it here, due to the way the ssa name
3777      it defines is mapped to the new definition.  So just replace
3778      rhs of the statement with something harmless.  */
3779 
3780   if (slp_node)
3781     return true;
3782 
3783   stmt_info = vect_orig_stmt (stmt_info);
3784   lhs = gimple_get_lhs (stmt_info->stmt);
3785 
3786   gassign *new_stmt
3787     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3788   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3789 
3790   return true;
3791 }
3792 
3793 
3794 struct simd_call_arg_info
3795 {
3796   tree vectype;
3797   tree op;
3798   HOST_WIDE_INT linear_step;
3799   enum vect_def_type dt;
3800   unsigned int align;
3801   bool simd_lane_linear;
3802 };
3803 
3804 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3805    is linear within simd lane (but not within whole loop), note it in
3806    *ARGINFO.  */
3807 
3808 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3809 vect_simd_lane_linear (tree op, class loop *loop,
3810 		       struct simd_call_arg_info *arginfo)
3811 {
3812   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3813 
3814   if (!is_gimple_assign (def_stmt)
3815       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3816       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3817     return;
3818 
3819   tree base = gimple_assign_rhs1 (def_stmt);
3820   HOST_WIDE_INT linear_step = 0;
3821   tree v = gimple_assign_rhs2 (def_stmt);
3822   while (TREE_CODE (v) == SSA_NAME)
3823     {
3824       tree t;
3825       def_stmt = SSA_NAME_DEF_STMT (v);
3826       if (is_gimple_assign (def_stmt))
3827 	switch (gimple_assign_rhs_code (def_stmt))
3828 	  {
3829 	  case PLUS_EXPR:
3830 	    t = gimple_assign_rhs2 (def_stmt);
3831 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3832 	      return;
3833 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3834 	    v = gimple_assign_rhs1 (def_stmt);
3835 	    continue;
3836 	  case MULT_EXPR:
3837 	    t = gimple_assign_rhs2 (def_stmt);
3838 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3839 	      return;
3840 	    linear_step = tree_to_shwi (t);
3841 	    v = gimple_assign_rhs1 (def_stmt);
3842 	    continue;
3843 	  CASE_CONVERT:
3844 	    t = gimple_assign_rhs1 (def_stmt);
3845 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3846 		|| (TYPE_PRECISION (TREE_TYPE (v))
3847 		    < TYPE_PRECISION (TREE_TYPE (t))))
3848 	      return;
3849 	    if (!linear_step)
3850 	      linear_step = 1;
3851 	    v = t;
3852 	    continue;
3853 	  default:
3854 	    return;
3855 	  }
3856       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3857 	       && loop->simduid
3858 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3859 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3860 		   == loop->simduid))
3861 	{
3862 	  if (!linear_step)
3863 	    linear_step = 1;
3864 	  arginfo->linear_step = linear_step;
3865 	  arginfo->op = base;
3866 	  arginfo->simd_lane_linear = true;
3867 	  return;
3868 	}
3869     }
3870 }
3871 
3872 /* Return the number of elements in vector type VECTYPE, which is associated
3873    with a SIMD clone.  At present these vectors always have a constant
3874    length.  */
3875 
3876 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3877 simd_clone_subparts (tree vectype)
3878 {
3879   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3880 }
3881 
3882 /* Function vectorizable_simd_clone_call.
3883 
3884    Check if STMT_INFO performs a function call that can be vectorized
3885    by calling a simd clone of the function.
3886    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3887    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3888    Return true if STMT_INFO is vectorizable in this way.  */
3889 
3890 static bool
vectorizable_simd_clone_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3891 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3892 			      gimple_stmt_iterator *gsi,
3893 			      stmt_vec_info *vec_stmt, slp_tree slp_node,
3894 			      stmt_vector_for_cost *)
3895 {
3896   tree vec_dest;
3897   tree scalar_dest;
3898   tree op, type;
3899   tree vec_oprnd0 = NULL_TREE;
3900   stmt_vec_info prev_stmt_info;
3901   tree vectype;
3902   unsigned int nunits;
3903   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3904   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3905   vec_info *vinfo = stmt_info->vinfo;
3906   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3907   tree fndecl, new_temp;
3908   int ncopies, j;
3909   auto_vec<simd_call_arg_info> arginfo;
3910   vec<tree> vargs = vNULL;
3911   size_t i, nargs;
3912   tree lhs, rtype, ratype;
3913   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3914 
3915   /* Is STMT a vectorizable call?   */
3916   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3917   if (!stmt)
3918     return false;
3919 
3920   fndecl = gimple_call_fndecl (stmt);
3921   if (fndecl == NULL_TREE)
3922     return false;
3923 
3924   struct cgraph_node *node = cgraph_node::get (fndecl);
3925   if (node == NULL || node->simd_clones == NULL)
3926     return false;
3927 
3928   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3929     return false;
3930 
3931   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3932       && ! vec_stmt)
3933     return false;
3934 
3935   if (gimple_call_lhs (stmt)
3936       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3937     return false;
3938 
3939   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3940 
3941   vectype = STMT_VINFO_VECTYPE (stmt_info);
3942 
3943   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3944     return false;
3945 
3946   /* FORNOW */
3947   if (slp_node)
3948     return false;
3949 
3950   /* Process function arguments.  */
3951   nargs = gimple_call_num_args (stmt);
3952 
3953   /* Bail out if the function has zero arguments.  */
3954   if (nargs == 0)
3955     return false;
3956 
3957   arginfo.reserve (nargs, true);
3958 
3959   for (i = 0; i < nargs; i++)
3960     {
3961       simd_call_arg_info thisarginfo;
3962       affine_iv iv;
3963 
3964       thisarginfo.linear_step = 0;
3965       thisarginfo.align = 0;
3966       thisarginfo.op = NULL_TREE;
3967       thisarginfo.simd_lane_linear = false;
3968 
3969       op = gimple_call_arg (stmt, i);
3970       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3971 			       &thisarginfo.vectype)
3972 	  || thisarginfo.dt == vect_uninitialized_def)
3973 	{
3974 	  if (dump_enabled_p ())
3975 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3976 			     "use not simple.\n");
3977 	  return false;
3978 	}
3979 
3980       if (thisarginfo.dt == vect_constant_def
3981 	  || thisarginfo.dt == vect_external_def)
3982 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3983       else
3984 	{
3985 	  gcc_assert (thisarginfo.vectype != NULL_TREE);
3986 	  if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3987 	    {
3988 	      if (dump_enabled_p ())
3989 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3990 				 "vector mask arguments are not supported\n");
3991 	      return false;
3992 	    }
3993 	}
3994 
3995       /* For linear arguments, the analyze phase should have saved
3996 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3997       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3998 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3999 	{
4000 	  gcc_assert (vec_stmt);
4001 	  thisarginfo.linear_step
4002 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4003 	  thisarginfo.op
4004 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4005 	  thisarginfo.simd_lane_linear
4006 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4007 	       == boolean_true_node);
4008 	  /* If loop has been peeled for alignment, we need to adjust it.  */
4009 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4010 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4011 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
4012 	    {
4013 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4014 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4015 	      tree opt = TREE_TYPE (thisarginfo.op);
4016 	      bias = fold_convert (TREE_TYPE (step), bias);
4017 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4018 	      thisarginfo.op
4019 		= fold_build2 (POINTER_TYPE_P (opt)
4020 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4021 			       thisarginfo.op, bias);
4022 	    }
4023 	}
4024       else if (!vec_stmt
4025 	       && thisarginfo.dt != vect_constant_def
4026 	       && thisarginfo.dt != vect_external_def
4027 	       && loop_vinfo
4028 	       && TREE_CODE (op) == SSA_NAME
4029 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
4030 			     &iv, false)
4031 	       && tree_fits_shwi_p (iv.step))
4032 	{
4033 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
4034 	  thisarginfo.op = iv.base;
4035 	}
4036       else if ((thisarginfo.dt == vect_constant_def
4037 		|| thisarginfo.dt == vect_external_def)
4038 	       && POINTER_TYPE_P (TREE_TYPE (op)))
4039 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4040       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4041 	 linear too.  */
4042       if (POINTER_TYPE_P (TREE_TYPE (op))
4043 	  && !thisarginfo.linear_step
4044 	  && !vec_stmt
4045 	  && thisarginfo.dt != vect_constant_def
4046 	  && thisarginfo.dt != vect_external_def
4047 	  && loop_vinfo
4048 	  && !slp_node
4049 	  && TREE_CODE (op) == SSA_NAME)
4050 	vect_simd_lane_linear (op, loop, &thisarginfo);
4051 
4052       arginfo.quick_push (thisarginfo);
4053     }
4054 
4055   unsigned HOST_WIDE_INT vf;
4056   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4057     {
4058       if (dump_enabled_p ())
4059 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4060 			 "not considering SIMD clones; not yet supported"
4061 			 " for variable-width vectors.\n");
4062       return false;
4063     }
4064 
4065   unsigned int badness = 0;
4066   struct cgraph_node *bestn = NULL;
4067   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4068     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4069   else
4070     for (struct cgraph_node *n = node->simd_clones; n != NULL;
4071 	 n = n->simdclone->next_clone)
4072       {
4073 	unsigned int this_badness = 0;
4074 	if (n->simdclone->simdlen > vf
4075 	    || n->simdclone->nargs != nargs)
4076 	  continue;
4077 	if (n->simdclone->simdlen < vf)
4078 	  this_badness += (exact_log2 (vf)
4079 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
4080 	if (n->simdclone->inbranch)
4081 	  this_badness += 2048;
4082 	int target_badness = targetm.simd_clone.usable (n);
4083 	if (target_badness < 0)
4084 	  continue;
4085 	this_badness += target_badness * 512;
4086 	/* FORNOW: Have to add code to add the mask argument.  */
4087 	if (n->simdclone->inbranch)
4088 	  continue;
4089 	for (i = 0; i < nargs; i++)
4090 	  {
4091 	    switch (n->simdclone->args[i].arg_type)
4092 	      {
4093 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
4094 		if (!useless_type_conversion_p
4095 			(n->simdclone->args[i].orig_type,
4096 			 TREE_TYPE (gimple_call_arg (stmt, i))))
4097 		  i = -1;
4098 		else if (arginfo[i].dt == vect_constant_def
4099 			 || arginfo[i].dt == vect_external_def
4100 			 || arginfo[i].linear_step)
4101 		  this_badness += 64;
4102 		break;
4103 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
4104 		if (arginfo[i].dt != vect_constant_def
4105 		    && arginfo[i].dt != vect_external_def)
4106 		  i = -1;
4107 		break;
4108 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4109 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4110 		if (arginfo[i].dt == vect_constant_def
4111 		    || arginfo[i].dt == vect_external_def
4112 		    || (arginfo[i].linear_step
4113 			!= n->simdclone->args[i].linear_step))
4114 		  i = -1;
4115 		break;
4116 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4117 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4118 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4119 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4120 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4121 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4122 		/* FORNOW */
4123 		i = -1;
4124 		break;
4125 	      case SIMD_CLONE_ARG_TYPE_MASK:
4126 		gcc_unreachable ();
4127 	      }
4128 	    if (i == (size_t) -1)
4129 	      break;
4130 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
4131 	      {
4132 		i = -1;
4133 		break;
4134 	      }
4135 	    if (arginfo[i].align)
4136 	      this_badness += (exact_log2 (arginfo[i].align)
4137 			       - exact_log2 (n->simdclone->args[i].alignment));
4138 	  }
4139 	if (i == (size_t) -1)
4140 	  continue;
4141 	if (bestn == NULL || this_badness < badness)
4142 	  {
4143 	    bestn = n;
4144 	    badness = this_badness;
4145 	  }
4146       }
4147 
4148   if (bestn == NULL)
4149     return false;
4150 
4151   for (i = 0; i < nargs; i++)
4152     if ((arginfo[i].dt == vect_constant_def
4153 	 || arginfo[i].dt == vect_external_def)
4154 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4155       {
4156 	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4157 	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4158 							  slp_node);
4159 	if (arginfo[i].vectype == NULL
4160 	    || (simd_clone_subparts (arginfo[i].vectype)
4161 		> bestn->simdclone->simdlen))
4162 	  return false;
4163       }
4164 
4165   fndecl = bestn->decl;
4166   nunits = bestn->simdclone->simdlen;
4167   ncopies = vf / nunits;
4168 
4169   /* If the function isn't const, only allow it in simd loops where user
4170      has asserted that at least nunits consecutive iterations can be
4171      performed using SIMD instructions.  */
4172   if ((loop == NULL || (unsigned) loop->safelen < nunits)
4173       && gimple_vuse (stmt))
4174     return false;
4175 
4176   /* Sanity check: make sure that at least one copy of the vectorized stmt
4177      needs to be generated.  */
4178   gcc_assert (ncopies >= 1);
4179 
4180   if (!vec_stmt) /* transformation not required.  */
4181     {
4182       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4183       for (i = 0; i < nargs; i++)
4184 	if ((bestn->simdclone->args[i].arg_type
4185 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4186 	    || (bestn->simdclone->args[i].arg_type
4187 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4188 	  {
4189 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4190 									+ 1);
4191 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4192 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4193 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
4194 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
4195 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4196 	    tree sll = arginfo[i].simd_lane_linear
4197 		       ? boolean_true_node : boolean_false_node;
4198 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4199 	  }
4200       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4201       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4202 /*      vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4203       return true;
4204     }
4205 
4206   /* Transform.  */
4207 
4208   if (dump_enabled_p ())
4209     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4210 
4211   /* Handle def.  */
4212   scalar_dest = gimple_call_lhs (stmt);
4213   vec_dest = NULL_TREE;
4214   rtype = NULL_TREE;
4215   ratype = NULL_TREE;
4216   if (scalar_dest)
4217     {
4218       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4219       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4220       if (TREE_CODE (rtype) == ARRAY_TYPE)
4221 	{
4222 	  ratype = rtype;
4223 	  rtype = TREE_TYPE (ratype);
4224 	}
4225     }
4226 
4227   prev_stmt_info = NULL;
4228   for (j = 0; j < ncopies; ++j)
4229     {
4230       /* Build argument list for the vectorized call.  */
4231       if (j == 0)
4232 	vargs.create (nargs);
4233       else
4234 	vargs.truncate (0);
4235 
4236       for (i = 0; i < nargs; i++)
4237 	{
4238 	  unsigned int k, l, m, o;
4239 	  tree atype;
4240 	  op = gimple_call_arg (stmt, i);
4241 	  switch (bestn->simdclone->args[i].arg_type)
4242 	    {
4243 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
4244 	      atype = bestn->simdclone->args[i].vector_type;
4245 	      o = nunits / simd_clone_subparts (atype);
4246 	      for (m = j * o; m < (j + 1) * o; m++)
4247 		{
4248 		  if (simd_clone_subparts (atype)
4249 		      < simd_clone_subparts (arginfo[i].vectype))
4250 		    {
4251 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4252 		      k = (simd_clone_subparts (arginfo[i].vectype)
4253 			   / simd_clone_subparts (atype));
4254 		      gcc_assert ((k & (k - 1)) == 0);
4255 		      if (m == 0)
4256 			vec_oprnd0
4257 			  = vect_get_vec_def_for_operand (op, stmt_info);
4258 		      else
4259 			{
4260 			  vec_oprnd0 = arginfo[i].op;
4261 			  if ((m & (k - 1)) == 0)
4262 			    vec_oprnd0
4263 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4264 								vec_oprnd0);
4265 			}
4266 		      arginfo[i].op = vec_oprnd0;
4267 		      vec_oprnd0
4268 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4269 				  bitsize_int (prec),
4270 				  bitsize_int ((m & (k - 1)) * prec));
4271 		      gassign *new_stmt
4272 			= gimple_build_assign (make_ssa_name (atype),
4273 					       vec_oprnd0);
4274 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4275 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
4276 		    }
4277 		  else
4278 		    {
4279 		      k = (simd_clone_subparts (atype)
4280 			   / simd_clone_subparts (arginfo[i].vectype));
4281 		      gcc_assert ((k & (k - 1)) == 0);
4282 		      vec<constructor_elt, va_gc> *ctor_elts;
4283 		      if (k != 1)
4284 			vec_alloc (ctor_elts, k);
4285 		      else
4286 			ctor_elts = NULL;
4287 		      for (l = 0; l < k; l++)
4288 			{
4289 			  if (m == 0 && l == 0)
4290 			    vec_oprnd0
4291 			      = vect_get_vec_def_for_operand (op, stmt_info);
4292 			  else
4293 			    vec_oprnd0
4294 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4295 								arginfo[i].op);
4296 			  arginfo[i].op = vec_oprnd0;
4297 			  if (k == 1)
4298 			    break;
4299 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4300 						  vec_oprnd0);
4301 			}
4302 		      if (k == 1)
4303 			vargs.safe_push (vec_oprnd0);
4304 		      else
4305 			{
4306 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4307 			  gassign *new_stmt
4308 			    = gimple_build_assign (make_ssa_name (atype),
4309 						   vec_oprnd0);
4310 			  vect_finish_stmt_generation (stmt_info, new_stmt,
4311 						       gsi);
4312 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4313 			}
4314 		    }
4315 		}
4316 	      break;
4317 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4318 	      vargs.safe_push (op);
4319 	      break;
4320 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4321 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4322 	      if (j == 0)
4323 		{
4324 		  gimple_seq stmts;
4325 		  arginfo[i].op
4326 		    = force_gimple_operand (unshare_expr (arginfo[i].op),
4327 					    &stmts, true, NULL_TREE);
4328 		  if (stmts != NULL)
4329 		    {
4330 		      basic_block new_bb;
4331 		      edge pe = loop_preheader_edge (loop);
4332 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4333 		      gcc_assert (!new_bb);
4334 		    }
4335 		  if (arginfo[i].simd_lane_linear)
4336 		    {
4337 		      vargs.safe_push (arginfo[i].op);
4338 		      break;
4339 		    }
4340 		  tree phi_res = copy_ssa_name (op);
4341 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4342 		  loop_vinfo->add_stmt (new_phi);
4343 		  add_phi_arg (new_phi, arginfo[i].op,
4344 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4345 		  enum tree_code code
4346 		    = POINTER_TYPE_P (TREE_TYPE (op))
4347 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4348 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4349 			      ? sizetype : TREE_TYPE (op);
4350 		  widest_int cst
4351 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4352 			       ncopies * nunits);
4353 		  tree tcst = wide_int_to_tree (type, cst);
4354 		  tree phi_arg = copy_ssa_name (op);
4355 		  gassign *new_stmt
4356 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4357 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4358 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4359 		  loop_vinfo->add_stmt (new_stmt);
4360 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4361 			       UNKNOWN_LOCATION);
4362 		  arginfo[i].op = phi_res;
4363 		  vargs.safe_push (phi_res);
4364 		}
4365 	      else
4366 		{
4367 		  enum tree_code code
4368 		    = POINTER_TYPE_P (TREE_TYPE (op))
4369 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4370 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4371 			      ? sizetype : TREE_TYPE (op);
4372 		  widest_int cst
4373 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4374 			       j * nunits);
4375 		  tree tcst = wide_int_to_tree (type, cst);
4376 		  new_temp = make_ssa_name (TREE_TYPE (op));
4377 		  gassign *new_stmt
4378 		    = gimple_build_assign (new_temp, code,
4379 					   arginfo[i].op, tcst);
4380 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4381 		  vargs.safe_push (new_temp);
4382 		}
4383 	      break;
4384 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4385 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4386 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4387 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4388 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4389 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4390 	    default:
4391 	      gcc_unreachable ();
4392 	    }
4393 	}
4394 
4395       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4396       if (vec_dest)
4397 	{
4398 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4399 	  if (ratype)
4400 	    new_temp = create_tmp_var (ratype);
4401 	  else if (simd_clone_subparts (vectype)
4402 		   == simd_clone_subparts (rtype))
4403 	    new_temp = make_ssa_name (vec_dest, new_call);
4404 	  else
4405 	    new_temp = make_ssa_name (rtype, new_call);
4406 	  gimple_call_set_lhs (new_call, new_temp);
4407 	}
4408       stmt_vec_info new_stmt_info
4409 	= vect_finish_stmt_generation (stmt_info, new_call, gsi);
4410 
4411       if (vec_dest)
4412 	{
4413 	  if (simd_clone_subparts (vectype) < nunits)
4414 	    {
4415 	      unsigned int k, l;
4416 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4417 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4418 	      k = nunits / simd_clone_subparts (vectype);
4419 	      gcc_assert ((k & (k - 1)) == 0);
4420 	      for (l = 0; l < k; l++)
4421 		{
4422 		  tree t;
4423 		  if (ratype)
4424 		    {
4425 		      t = build_fold_addr_expr (new_temp);
4426 		      t = build2 (MEM_REF, vectype, t,
4427 				  build_int_cst (TREE_TYPE (t), l * bytes));
4428 		    }
4429 		  else
4430 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4431 				bitsize_int (prec), bitsize_int (l * prec));
4432 		  gimple *new_stmt
4433 		    = gimple_build_assign (make_ssa_name (vectype), t);
4434 		  new_stmt_info
4435 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4436 
4437 		  if (j == 0 && l == 0)
4438 		    STMT_VINFO_VEC_STMT (stmt_info)
4439 		      = *vec_stmt = new_stmt_info;
4440 		  else
4441 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4442 
4443 		  prev_stmt_info = new_stmt_info;
4444 		}
4445 
4446 	      if (ratype)
4447 		vect_clobber_variable (stmt_info, gsi, new_temp);
4448 	      continue;
4449 	    }
4450 	  else if (simd_clone_subparts (vectype) > nunits)
4451 	    {
4452 	      unsigned int k = (simd_clone_subparts (vectype)
4453 				/ simd_clone_subparts (rtype));
4454 	      gcc_assert ((k & (k - 1)) == 0);
4455 	      if ((j & (k - 1)) == 0)
4456 		vec_alloc (ret_ctor_elts, k);
4457 	      if (ratype)
4458 		{
4459 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
4460 		  for (m = 0; m < o; m++)
4461 		    {
4462 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4463 					 size_int (m), NULL_TREE, NULL_TREE);
4464 		      gimple *new_stmt
4465 			= gimple_build_assign (make_ssa_name (rtype), tem);
4466 		      new_stmt_info
4467 			= vect_finish_stmt_generation (stmt_info, new_stmt,
4468 						       gsi);
4469 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4470 					      gimple_assign_lhs (new_stmt));
4471 		    }
4472 		  vect_clobber_variable (stmt_info, gsi, new_temp);
4473 		}
4474 	      else
4475 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4476 	      if ((j & (k - 1)) != k - 1)
4477 		continue;
4478 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4479 	      gimple *new_stmt
4480 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4481 	      new_stmt_info
4482 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4483 
4484 	      if ((unsigned) j == k - 1)
4485 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4486 	      else
4487 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4488 
4489 	      prev_stmt_info = new_stmt_info;
4490 	      continue;
4491 	    }
4492 	  else if (ratype)
4493 	    {
4494 	      tree t = build_fold_addr_expr (new_temp);
4495 	      t = build2 (MEM_REF, vectype, t,
4496 			  build_int_cst (TREE_TYPE (t), 0));
4497 	      gimple *new_stmt
4498 		= gimple_build_assign (make_ssa_name (vec_dest), t);
4499 	      new_stmt_info
4500 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4501 	      vect_clobber_variable (stmt_info, gsi, new_temp);
4502 	    }
4503 	}
4504 
4505       if (j == 0)
4506 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4507       else
4508 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4509 
4510       prev_stmt_info = new_stmt_info;
4511     }
4512 
4513   vargs.release ();
4514 
4515   /* The call in STMT might prevent it from being removed in dce.
4516      We however cannot remove it here, due to the way the ssa name
4517      it defines is mapped to the new definition.  So just replace
4518      rhs of the statement with something harmless.  */
4519 
4520   if (slp_node)
4521     return true;
4522 
4523   gimple *new_stmt;
4524   if (scalar_dest)
4525     {
4526       type = TREE_TYPE (scalar_dest);
4527       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4528       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4529     }
4530   else
4531     new_stmt = gimple_build_nop ();
4532   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4533   unlink_stmt_vdef (stmt);
4534 
4535   return true;
4536 }
4537 
4538 
4539 /* Function vect_gen_widened_results_half
4540 
4541    Create a vector stmt whose code, type, number of arguments, and result
4542    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4543    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
4544    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4545    needs to be created (DECL is a function-decl of a target-builtin).
4546    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4547 
4548 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4549 vect_gen_widened_results_half (enum tree_code code,
4550                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4551 			       tree vec_dest, gimple_stmt_iterator *gsi,
4552 			       stmt_vec_info stmt_info)
4553 {
4554   gimple *new_stmt;
4555   tree new_temp;
4556 
4557   /* Generate half of the widened result:  */
4558   gcc_assert (op_type == TREE_CODE_LENGTH (code));
4559   if (op_type != binary_op)
4560     vec_oprnd1 = NULL;
4561   new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4562   new_temp = make_ssa_name (vec_dest, new_stmt);
4563   gimple_assign_set_lhs (new_stmt, new_temp);
4564   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4565 
4566   return new_stmt;
4567 }
4568 
4569 
4570 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4571    For the first operand we call vect_get_vec_def_for_operand (with OPRND
4572    containing scalar operand), and for the rest we get a copy with
4573    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4574    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4575    The vectors are collected into VEC_OPRNDS.  */
4576 
4577 static void
vect_get_loop_based_defs(tree * oprnd,stmt_vec_info stmt_info,vec<tree> * vec_oprnds,int multi_step_cvt)4578 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4579 			  vec<tree> *vec_oprnds, int multi_step_cvt)
4580 {
4581   vec_info *vinfo = stmt_info->vinfo;
4582   tree vec_oprnd;
4583 
4584   /* Get first vector operand.  */
4585   /* All the vector operands except the very first one (that is scalar oprnd)
4586      are stmt copies.  */
4587   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4588     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4589   else
4590     vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4591 
4592   vec_oprnds->quick_push (vec_oprnd);
4593 
4594   /* Get second vector operand.  */
4595   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4596   vec_oprnds->quick_push (vec_oprnd);
4597 
4598   *oprnd = vec_oprnd;
4599 
4600   /* For conversion in multiple steps, continue to get operands
4601      recursively.  */
4602   if (multi_step_cvt)
4603     vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4604 			      multi_step_cvt - 1);
4605 }
4606 
4607 
4608 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4609    For multi-step conversions store the resulting vectors and call the function
4610    recursively.  */
4611 
4612 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4613 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4614 				       int multi_step_cvt,
4615 				       stmt_vec_info stmt_info,
4616 				       vec<tree> vec_dsts,
4617 				       gimple_stmt_iterator *gsi,
4618 				       slp_tree slp_node, enum tree_code code,
4619 				       stmt_vec_info *prev_stmt_info)
4620 {
4621   unsigned int i;
4622   tree vop0, vop1, new_tmp, vec_dest;
4623 
4624   vec_dest = vec_dsts.pop ();
4625 
4626   for (i = 0; i < vec_oprnds->length (); i += 2)
4627     {
4628       /* Create demotion operation.  */
4629       vop0 = (*vec_oprnds)[i];
4630       vop1 = (*vec_oprnds)[i + 1];
4631       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4632       new_tmp = make_ssa_name (vec_dest, new_stmt);
4633       gimple_assign_set_lhs (new_stmt, new_tmp);
4634       stmt_vec_info new_stmt_info
4635 	= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4636 
4637       if (multi_step_cvt)
4638 	/* Store the resulting vector for next recursive call.  */
4639 	(*vec_oprnds)[i/2] = new_tmp;
4640       else
4641 	{
4642 	  /* This is the last step of the conversion sequence. Store the
4643 	     vectors in SLP_NODE or in vector info of the scalar statement
4644 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4645 	  if (slp_node)
4646 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4647 	  else
4648 	    {
4649 	      if (!*prev_stmt_info)
4650 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4651 	      else
4652 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4653 
4654 	      *prev_stmt_info = new_stmt_info;
4655 	    }
4656 	}
4657     }
4658 
4659   /* For multi-step demotion operations we first generate demotion operations
4660      from the source type to the intermediate types, and then combine the
4661      results (stored in VEC_OPRNDS) in demotion operation to the destination
4662      type.  */
4663   if (multi_step_cvt)
4664     {
4665       /* At each level of recursion we have half of the operands we had at the
4666 	 previous level.  */
4667       vec_oprnds->truncate ((i+1)/2);
4668       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4669 					     stmt_info, vec_dsts, gsi,
4670 					     slp_node, VEC_PACK_TRUNC_EXPR,
4671 					     prev_stmt_info);
4672     }
4673 
4674   vec_dsts.quick_push (vec_dest);
4675 }
4676 
4677 
4678 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4679    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4680    STMT_INFO.  For multi-step conversions store the resulting vectors and
4681    call the function recursively.  */
4682 
4683 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4684 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4685 					vec<tree> *vec_oprnds1,
4686 					stmt_vec_info stmt_info, tree vec_dest,
4687 					gimple_stmt_iterator *gsi,
4688 					enum tree_code code1,
4689 					enum tree_code code2, int op_type)
4690 {
4691   int i;
4692   tree vop0, vop1, new_tmp1, new_tmp2;
4693   gimple *new_stmt1, *new_stmt2;
4694   vec<tree> vec_tmp = vNULL;
4695 
4696   vec_tmp.create (vec_oprnds0->length () * 2);
4697   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4698     {
4699       if (op_type == binary_op)
4700 	vop1 = (*vec_oprnds1)[i];
4701       else
4702 	vop1 = NULL_TREE;
4703 
4704       /* Generate the two halves of promotion operation.  */
4705       new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1,
4706 						 op_type, vec_dest, gsi,
4707 						 stmt_info);
4708       new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1,
4709 						 op_type, vec_dest, gsi,
4710 						 stmt_info);
4711       if (is_gimple_call (new_stmt1))
4712 	{
4713 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4714 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4715 	}
4716       else
4717 	{
4718 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4719 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4720 	}
4721 
4722       /* Store the results for the next step.  */
4723       vec_tmp.quick_push (new_tmp1);
4724       vec_tmp.quick_push (new_tmp2);
4725     }
4726 
4727   vec_oprnds0->release ();
4728   *vec_oprnds0 = vec_tmp;
4729 }
4730 
4731 
4732 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4733    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4734    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4735    Return true if STMT_INFO is vectorizable in this way.  */
4736 
4737 static bool
vectorizable_conversion(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4738 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4739 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
4740 			 stmt_vector_for_cost *cost_vec)
4741 {
4742   tree vec_dest;
4743   tree scalar_dest;
4744   tree op0, op1 = NULL_TREE;
4745   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4746   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4747   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4748   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4749   tree new_temp;
4750   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4751   int ndts = 2;
4752   stmt_vec_info prev_stmt_info;
4753   poly_uint64 nunits_in;
4754   poly_uint64 nunits_out;
4755   tree vectype_out, vectype_in;
4756   int ncopies, i, j;
4757   tree lhs_type, rhs_type;
4758   enum { NARROW, NONE, WIDEN } modifier;
4759   vec<tree> vec_oprnds0 = vNULL;
4760   vec<tree> vec_oprnds1 = vNULL;
4761   tree vop0;
4762   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4763   vec_info *vinfo = stmt_info->vinfo;
4764   int multi_step_cvt = 0;
4765   vec<tree> interm_types = vNULL;
4766   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4767   int op_type;
4768   unsigned short fltsz;
4769 
4770   /* Is STMT a vectorizable conversion?   */
4771 
4772   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4773     return false;
4774 
4775   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4776       && ! vec_stmt)
4777     return false;
4778 
4779   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4780   if (!stmt)
4781     return false;
4782 
4783   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4784     return false;
4785 
4786   code = gimple_assign_rhs_code (stmt);
4787   if (!CONVERT_EXPR_CODE_P (code)
4788       && code != FIX_TRUNC_EXPR
4789       && code != FLOAT_EXPR
4790       && code != WIDEN_MULT_EXPR
4791       && code != WIDEN_LSHIFT_EXPR)
4792     return false;
4793 
4794   op_type = TREE_CODE_LENGTH (code);
4795 
4796   /* Check types of lhs and rhs.  */
4797   scalar_dest = gimple_assign_lhs (stmt);
4798   lhs_type = TREE_TYPE (scalar_dest);
4799   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4800 
4801   op0 = gimple_assign_rhs1 (stmt);
4802   rhs_type = TREE_TYPE (op0);
4803 
4804   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4805       && !((INTEGRAL_TYPE_P (lhs_type)
4806 	    && INTEGRAL_TYPE_P (rhs_type))
4807 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4808 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4809     return false;
4810 
4811   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4812       && ((INTEGRAL_TYPE_P (lhs_type)
4813 	   && !type_has_mode_precision_p (lhs_type))
4814 	  || (INTEGRAL_TYPE_P (rhs_type)
4815 	      && !type_has_mode_precision_p (rhs_type))))
4816     {
4817       if (dump_enabled_p ())
4818 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4819                          "type conversion to/from bit-precision unsupported."
4820                          "\n");
4821       return false;
4822     }
4823 
4824   /* Check the operands of the operation.  */
4825   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4826     {
4827       if (dump_enabled_p ())
4828 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4829                          "use not simple.\n");
4830       return false;
4831     }
4832   if (op_type == binary_op)
4833     {
4834       bool ok;
4835 
4836       op1 = gimple_assign_rhs2 (stmt);
4837       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4838       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4839 	 OP1.  */
4840       if (CONSTANT_CLASS_P (op0))
4841 	ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4842       else
4843 	ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4844 
4845       if (!ok)
4846 	{
4847           if (dump_enabled_p ())
4848             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4849                              "use not simple.\n");
4850 	  return false;
4851 	}
4852     }
4853 
4854   /* If op0 is an external or constant def, infer the vector type
4855      from the scalar type.  */
4856   if (!vectype_in)
4857     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4858   if (vec_stmt)
4859     gcc_assert (vectype_in);
4860   if (!vectype_in)
4861     {
4862       if (dump_enabled_p ())
4863 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4864 			 "no vectype for scalar type %T\n", rhs_type);
4865 
4866       return false;
4867     }
4868 
4869   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4870       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4871     {
4872       if (dump_enabled_p ())
4873 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874 			 "can't convert between boolean and non "
4875 			 "boolean vectors %T\n", rhs_type);
4876 
4877       return false;
4878     }
4879 
4880   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4881   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4882   if (known_eq (nunits_out, nunits_in))
4883     modifier = NONE;
4884   else if (multiple_p (nunits_out, nunits_in))
4885     modifier = NARROW;
4886   else
4887     {
4888       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4889       modifier = WIDEN;
4890     }
4891 
4892   /* Multiple types in SLP are handled by creating the appropriate number of
4893      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4894      case of SLP.  */
4895   if (slp_node)
4896     ncopies = 1;
4897   else if (modifier == NARROW)
4898     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4899   else
4900     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4901 
4902   /* Sanity check: make sure that at least one copy of the vectorized stmt
4903      needs to be generated.  */
4904   gcc_assert (ncopies >= 1);
4905 
4906   bool found_mode = false;
4907   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4908   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4909   opt_scalar_mode rhs_mode_iter;
4910 
4911   /* Supportable by target?  */
4912   switch (modifier)
4913     {
4914     case NONE:
4915       if (code != FIX_TRUNC_EXPR
4916 	  && code != FLOAT_EXPR
4917 	  && !CONVERT_EXPR_CODE_P (code))
4918 	return false;
4919       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4920 	break;
4921       /* FALLTHRU */
4922     unsupported:
4923       if (dump_enabled_p ())
4924 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4925                          "conversion not supported by target.\n");
4926       return false;
4927 
4928     case WIDEN:
4929       if (supportable_widening_operation (code, stmt_info, vectype_out,
4930 					  vectype_in, &code1, &code2,
4931 					  &multi_step_cvt, &interm_types))
4932 	{
4933 	  /* Binary widening operation can only be supported directly by the
4934 	     architecture.  */
4935 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4936 	  break;
4937 	}
4938 
4939       if (code != FLOAT_EXPR
4940 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4941 	goto unsupported;
4942 
4943       fltsz = GET_MODE_SIZE (lhs_mode);
4944       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4945 	{
4946 	  rhs_mode = rhs_mode_iter.require ();
4947 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4948 	    break;
4949 
4950 	  cvt_type
4951 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4952 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4953 	  if (cvt_type == NULL_TREE)
4954 	    goto unsupported;
4955 
4956 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4957 	    {
4958 	      if (!supportable_convert_operation (code, vectype_out,
4959 						  cvt_type, &codecvt1))
4960 		goto unsupported;
4961 	    }
4962 	  else if (!supportable_widening_operation (code, stmt_info,
4963 						    vectype_out, cvt_type,
4964 						    &codecvt1, &codecvt2,
4965 						    &multi_step_cvt,
4966 						    &interm_types))
4967 	    continue;
4968 	  else
4969 	    gcc_assert (multi_step_cvt == 0);
4970 
4971 	  if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4972 					      vectype_in, &code1, &code2,
4973 					      &multi_step_cvt, &interm_types))
4974 	    {
4975 	      found_mode = true;
4976 	      break;
4977 	    }
4978 	}
4979 
4980       if (!found_mode)
4981 	goto unsupported;
4982 
4983       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4984 	codecvt2 = ERROR_MARK;
4985       else
4986 	{
4987 	  multi_step_cvt++;
4988 	  interm_types.safe_push (cvt_type);
4989 	  cvt_type = NULL_TREE;
4990 	}
4991       break;
4992 
4993     case NARROW:
4994       gcc_assert (op_type == unary_op);
4995       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4996 					   &code1, &multi_step_cvt,
4997 					   &interm_types))
4998 	break;
4999 
5000       if (code != FIX_TRUNC_EXPR
5001 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5002 	goto unsupported;
5003 
5004       cvt_type
5005 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5006       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5007       if (cvt_type == NULL_TREE)
5008 	goto unsupported;
5009       if (!supportable_convert_operation (code, cvt_type, vectype_in,
5010 					  &codecvt1))
5011 	goto unsupported;
5012       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5013 					   &code1, &multi_step_cvt,
5014 					   &interm_types))
5015 	break;
5016       goto unsupported;
5017 
5018     default:
5019       gcc_unreachable ();
5020     }
5021 
5022   if (!vec_stmt)		/* transformation not required.  */
5023     {
5024       DUMP_VECT_SCOPE ("vectorizable_conversion");
5025       if (modifier == NONE)
5026         {
5027 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5028 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5029 				  cost_vec);
5030 	}
5031       else if (modifier == NARROW)
5032 	{
5033 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5034 	  /* The final packing step produces one vector result per copy.  */
5035 	  unsigned int nvectors
5036 	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5037 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5038 					      multi_step_cvt, cost_vec);
5039 	}
5040       else
5041 	{
5042 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5043 	  /* The initial unpacking step produces two vector results
5044 	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
5045 	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
5046 	  unsigned int nvectors
5047 	    = (slp_node
5048 	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5049 	       : ncopies * 2);
5050 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5051 					      multi_step_cvt, cost_vec);
5052 	}
5053       interm_types.release ();
5054       return true;
5055     }
5056 
5057   /* Transform.  */
5058   if (dump_enabled_p ())
5059     dump_printf_loc (MSG_NOTE, vect_location,
5060                      "transform conversion. ncopies = %d.\n", ncopies);
5061 
5062   if (op_type == binary_op)
5063     {
5064       if (CONSTANT_CLASS_P (op0))
5065 	op0 = fold_convert (TREE_TYPE (op1), op0);
5066       else if (CONSTANT_CLASS_P (op1))
5067 	op1 = fold_convert (TREE_TYPE (op0), op1);
5068     }
5069 
5070   /* In case of multi-step conversion, we first generate conversion operations
5071      to the intermediate types, and then from that types to the final one.
5072      We create vector destinations for the intermediate type (TYPES) received
5073      from supportable_*_operation, and store them in the correct order
5074      for future use in vect_create_vectorized_*_stmts ().  */
5075   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5076   vec_dest = vect_create_destination_var (scalar_dest,
5077 					  (cvt_type && modifier == WIDEN)
5078 					  ? cvt_type : vectype_out);
5079   vec_dsts.quick_push (vec_dest);
5080 
5081   if (multi_step_cvt)
5082     {
5083       for (i = interm_types.length () - 1;
5084 	   interm_types.iterate (i, &intermediate_type); i--)
5085 	{
5086 	  vec_dest = vect_create_destination_var (scalar_dest,
5087 						  intermediate_type);
5088 	  vec_dsts.quick_push (vec_dest);
5089 	}
5090     }
5091 
5092   if (cvt_type)
5093     vec_dest = vect_create_destination_var (scalar_dest,
5094 					    modifier == WIDEN
5095 					    ? vectype_out : cvt_type);
5096 
5097   if (!slp_node)
5098     {
5099       if (modifier == WIDEN)
5100 	{
5101 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5102 	  if (op_type == binary_op)
5103 	    vec_oprnds1.create (1);
5104 	}
5105       else if (modifier == NARROW)
5106 	vec_oprnds0.create (
5107 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5108     }
5109   else if (code == WIDEN_LSHIFT_EXPR)
5110     vec_oprnds1.create (slp_node->vec_stmts_size);
5111 
5112   last_oprnd = op0;
5113   prev_stmt_info = NULL;
5114   switch (modifier)
5115     {
5116     case NONE:
5117       for (j = 0; j < ncopies; j++)
5118 	{
5119 	  if (j == 0)
5120 	    vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5121 			       NULL, slp_node);
5122 	  else
5123 	    vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5124 
5125 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5126 	    {
5127 	      stmt_vec_info new_stmt_info;
5128 	      /* Arguments are ready, create the new vector stmt.  */
5129 	      gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5130 	      gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5131 	      new_temp = make_ssa_name (vec_dest, new_stmt);
5132 	      gimple_assign_set_lhs (new_stmt, new_temp);
5133 	      new_stmt_info
5134 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5135 
5136 	      if (slp_node)
5137 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5138 	      else
5139 		{
5140 		  if (!prev_stmt_info)
5141 		    STMT_VINFO_VEC_STMT (stmt_info)
5142 		      = *vec_stmt = new_stmt_info;
5143 		  else
5144 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5145 		  prev_stmt_info = new_stmt_info;
5146 		}
5147 	    }
5148 	}
5149       break;
5150 
5151     case WIDEN:
5152       /* In case the vectorization factor (VF) is bigger than the number
5153 	 of elements that we can fit in a vectype (nunits), we have to
5154 	 generate more than one vector stmt - i.e - we need to "unroll"
5155 	 the vector stmt by a factor VF/nunits.  */
5156       for (j = 0; j < ncopies; j++)
5157 	{
5158 	  /* Handle uses.  */
5159 	  if (j == 0)
5160 	    {
5161 	      if (slp_node)
5162 		{
5163 		  if (code == WIDEN_LSHIFT_EXPR)
5164 		    {
5165 		      unsigned int k;
5166 
5167 		      vec_oprnd1 = op1;
5168 		      /* Store vec_oprnd1 for every vector stmt to be created
5169 			 for SLP_NODE.  We check during the analysis that all
5170 			 the shift arguments are the same.  */
5171 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5172 			vec_oprnds1.quick_push (vec_oprnd1);
5173 
5174 		      vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5175 					 &vec_oprnds0, NULL, slp_node);
5176 		    }
5177 		  else
5178 		    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5179 				       &vec_oprnds1, slp_node);
5180 		}
5181 	      else
5182 		{
5183 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5184 		  vec_oprnds0.quick_push (vec_oprnd0);
5185 		  if (op_type == binary_op)
5186 		    {
5187 		      if (code == WIDEN_LSHIFT_EXPR)
5188 			vec_oprnd1 = op1;
5189 		      else
5190 			vec_oprnd1
5191 			  = vect_get_vec_def_for_operand (op1, stmt_info);
5192 		      vec_oprnds1.quick_push (vec_oprnd1);
5193 		    }
5194 		}
5195 	    }
5196 	  else
5197 	    {
5198 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5199 	      vec_oprnds0.truncate (0);
5200 	      vec_oprnds0.quick_push (vec_oprnd0);
5201 	      if (op_type == binary_op)
5202 		{
5203 		  if (code == WIDEN_LSHIFT_EXPR)
5204 		    vec_oprnd1 = op1;
5205 		  else
5206 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5207 								 vec_oprnd1);
5208 		  vec_oprnds1.truncate (0);
5209 		  vec_oprnds1.quick_push (vec_oprnd1);
5210 		}
5211 	    }
5212 
5213 	  /* Arguments are ready.  Create the new vector stmts.  */
5214 	  for (i = multi_step_cvt; i >= 0; i--)
5215 	    {
5216 	      tree this_dest = vec_dsts[i];
5217 	      enum tree_code c1 = code1, c2 = code2;
5218 	      if (i == 0 && codecvt2 != ERROR_MARK)
5219 		{
5220 		  c1 = codecvt1;
5221 		  c2 = codecvt2;
5222 		}
5223 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5224 						      &vec_oprnds1, stmt_info,
5225 						      this_dest, gsi,
5226 						      c1, c2, op_type);
5227 	    }
5228 
5229 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5230 	    {
5231 	      stmt_vec_info new_stmt_info;
5232 	      if (cvt_type)
5233 		{
5234 		  gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5235 		  new_temp = make_ssa_name (vec_dest);
5236 		  gassign *new_stmt
5237 		    = gimple_build_assign (new_temp, codecvt1, vop0);
5238 		  new_stmt_info
5239 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5240 		}
5241 	      else
5242 		new_stmt_info = vinfo->lookup_def (vop0);
5243 
5244 	      if (slp_node)
5245 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5246 	      else
5247 		{
5248 		  if (!prev_stmt_info)
5249 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5250 		  else
5251 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5252 		  prev_stmt_info = new_stmt_info;
5253 		}
5254 	    }
5255 	}
5256 
5257       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5258       break;
5259 
5260     case NARROW:
5261       /* In case the vectorization factor (VF) is bigger than the number
5262 	 of elements that we can fit in a vectype (nunits), we have to
5263 	 generate more than one vector stmt - i.e - we need to "unroll"
5264 	 the vector stmt by a factor VF/nunits.  */
5265       for (j = 0; j < ncopies; j++)
5266 	{
5267 	  /* Handle uses.  */
5268 	  if (slp_node)
5269 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5270 			       slp_node);
5271 	  else
5272 	    {
5273 	      vec_oprnds0.truncate (0);
5274 	      vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5275 					vect_pow2 (multi_step_cvt) - 1);
5276 	    }
5277 
5278 	  /* Arguments are ready.  Create the new vector stmts.  */
5279 	  if (cvt_type)
5280 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5281 	      {
5282 		gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5283 		new_temp = make_ssa_name (vec_dest);
5284 		gassign *new_stmt
5285 		    = gimple_build_assign (new_temp, codecvt1, vop0);
5286 		vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5287 		vec_oprnds0[i] = new_temp;
5288 	      }
5289 
5290 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5291 						 stmt_info, vec_dsts, gsi,
5292 						 slp_node, code1,
5293 						 &prev_stmt_info);
5294 	}
5295 
5296       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5297       break;
5298     }
5299 
5300   vec_oprnds0.release ();
5301   vec_oprnds1.release ();
5302   interm_types.release ();
5303 
5304   return true;
5305 }
5306 
5307 /* Return true if we can assume from the scalar form of STMT_INFO that
5308    neither the scalar nor the vector forms will generate code.  STMT_INFO
5309    is known not to involve a data reference.  */
5310 
5311 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5312 vect_nop_conversion_p (stmt_vec_info stmt_info)
5313 {
5314   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5315   if (!stmt)
5316     return false;
5317 
5318   tree lhs = gimple_assign_lhs (stmt);
5319   tree_code code = gimple_assign_rhs_code (stmt);
5320   tree rhs = gimple_assign_rhs1 (stmt);
5321 
5322   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5323     return true;
5324 
5325   if (CONVERT_EXPR_CODE_P (code))
5326     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5327 
5328   return false;
5329 }
5330 
5331 /* Function vectorizable_assignment.
5332 
5333    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5334    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5335    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5336    Return true if STMT_INFO is vectorizable in this way.  */
5337 
5338 static bool
vectorizable_assignment(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5339 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5340 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
5341 			 stmt_vector_for_cost *cost_vec)
5342 {
5343   tree vec_dest;
5344   tree scalar_dest;
5345   tree op;
5346   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5347   tree new_temp;
5348   enum vect_def_type dt[1] = {vect_unknown_def_type};
5349   int ndts = 1;
5350   int ncopies;
5351   int i, j;
5352   vec<tree> vec_oprnds = vNULL;
5353   tree vop;
5354   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5355   vec_info *vinfo = stmt_info->vinfo;
5356   stmt_vec_info prev_stmt_info = NULL;
5357   enum tree_code code;
5358   tree vectype_in;
5359 
5360   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5361     return false;
5362 
5363   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5364       && ! vec_stmt)
5365     return false;
5366 
5367   /* Is vectorizable assignment?  */
5368   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5369   if (!stmt)
5370     return false;
5371 
5372   scalar_dest = gimple_assign_lhs (stmt);
5373   if (TREE_CODE (scalar_dest) != SSA_NAME)
5374     return false;
5375 
5376   code = gimple_assign_rhs_code (stmt);
5377   if (gimple_assign_single_p (stmt)
5378       || code == PAREN_EXPR
5379       || CONVERT_EXPR_CODE_P (code))
5380     op = gimple_assign_rhs1 (stmt);
5381   else
5382     return false;
5383 
5384   if (code == VIEW_CONVERT_EXPR)
5385     op = TREE_OPERAND (op, 0);
5386 
5387   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5388   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5389 
5390   /* Multiple types in SLP are handled by creating the appropriate number of
5391      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5392      case of SLP.  */
5393   if (slp_node)
5394     ncopies = 1;
5395   else
5396     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5397 
5398   gcc_assert (ncopies >= 1);
5399 
5400   if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5401     {
5402       if (dump_enabled_p ())
5403         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5404                          "use not simple.\n");
5405       return false;
5406     }
5407 
5408   /* We can handle NOP_EXPR conversions that do not change the number
5409      of elements or the vector size.  */
5410   if ((CONVERT_EXPR_CODE_P (code)
5411        || code == VIEW_CONVERT_EXPR)
5412       && (!vectype_in
5413 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5414 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5415 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5416     return false;
5417 
5418   /* We do not handle bit-precision changes.  */
5419   if ((CONVERT_EXPR_CODE_P (code)
5420        || code == VIEW_CONVERT_EXPR)
5421       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5422       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5423 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5424       /* But a conversion that does not change the bit-pattern is ok.  */
5425       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5426 	    > TYPE_PRECISION (TREE_TYPE (op)))
5427 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
5428       /* Conversion between boolean types of different sizes is
5429 	 a simple assignment in case their vectypes are same
5430 	 boolean vectors.  */
5431       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5432 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5433     {
5434       if (dump_enabled_p ())
5435         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5436                          "type conversion to/from bit-precision "
5437                          "unsupported.\n");
5438       return false;
5439     }
5440 
5441   if (!vec_stmt) /* transformation not required.  */
5442     {
5443       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5444       DUMP_VECT_SCOPE ("vectorizable_assignment");
5445       if (!vect_nop_conversion_p (stmt_info))
5446 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5447 				cost_vec);
5448       return true;
5449     }
5450 
5451   /* Transform.  */
5452   if (dump_enabled_p ())
5453     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5454 
5455   /* Handle def.  */
5456   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5457 
5458   /* Handle use.  */
5459   for (j = 0; j < ncopies; j++)
5460     {
5461       /* Handle uses.  */
5462       if (j == 0)
5463 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5464       else
5465 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5466 
5467       /* Arguments are ready. create the new vector stmt.  */
5468       stmt_vec_info new_stmt_info = NULL;
5469       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5470        {
5471 	 if (CONVERT_EXPR_CODE_P (code)
5472 	     || code == VIEW_CONVERT_EXPR)
5473 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5474 	 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5475          new_temp = make_ssa_name (vec_dest, new_stmt);
5476          gimple_assign_set_lhs (new_stmt, new_temp);
5477 	 new_stmt_info
5478 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5479          if (slp_node)
5480 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5481        }
5482 
5483       if (slp_node)
5484         continue;
5485 
5486       if (j == 0)
5487 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5488       else
5489 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5490 
5491       prev_stmt_info = new_stmt_info;
5492     }
5493 
5494   vec_oprnds.release ();
5495   return true;
5496 }
5497 
5498 
5499 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5500    either as shift by a scalar or by a vector.  */
5501 
5502 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5503 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5504 {
5505 
5506   machine_mode vec_mode;
5507   optab optab;
5508   int icode;
5509   tree vectype;
5510 
5511   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5512   if (!vectype)
5513     return false;
5514 
5515   optab = optab_for_tree_code (code, vectype, optab_scalar);
5516   if (!optab
5517       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5518     {
5519       optab = optab_for_tree_code (code, vectype, optab_vector);
5520       if (!optab
5521           || (optab_handler (optab, TYPE_MODE (vectype))
5522                       == CODE_FOR_nothing))
5523         return false;
5524     }
5525 
5526   vec_mode = TYPE_MODE (vectype);
5527   icode = (int) optab_handler (optab, vec_mode);
5528   if (icode == CODE_FOR_nothing)
5529     return false;
5530 
5531   return true;
5532 }
5533 
5534 
5535 /* Function vectorizable_shift.
5536 
5537    Check if STMT_INFO performs a shift operation that can be vectorized.
5538    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5539    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5540    Return true if STMT_INFO is vectorizable in this way.  */
5541 
5542 static bool
vectorizable_shift(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5543 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5544 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
5545 		    stmt_vector_for_cost *cost_vec)
5546 {
5547   tree vec_dest;
5548   tree scalar_dest;
5549   tree op0, op1 = NULL;
5550   tree vec_oprnd1 = NULL_TREE;
5551   tree vectype;
5552   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5553   enum tree_code code;
5554   machine_mode vec_mode;
5555   tree new_temp;
5556   optab optab;
5557   int icode;
5558   machine_mode optab_op2_mode;
5559   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5560   int ndts = 2;
5561   stmt_vec_info prev_stmt_info;
5562   poly_uint64 nunits_in;
5563   poly_uint64 nunits_out;
5564   tree vectype_out;
5565   tree op1_vectype;
5566   int ncopies;
5567   int j, i;
5568   vec<tree> vec_oprnds0 = vNULL;
5569   vec<tree> vec_oprnds1 = vNULL;
5570   tree vop0, vop1;
5571   unsigned int k;
5572   bool scalar_shift_arg = true;
5573   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5574   vec_info *vinfo = stmt_info->vinfo;
5575   bool incompatible_op1_vectype_p = false;
5576 
5577   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5578     return false;
5579 
5580   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5581       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5582       && ! vec_stmt)
5583     return false;
5584 
5585   /* Is STMT a vectorizable binary/unary operation?   */
5586   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5587   if (!stmt)
5588     return false;
5589 
5590   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5591     return false;
5592 
5593   code = gimple_assign_rhs_code (stmt);
5594 
5595   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5596       || code == RROTATE_EXPR))
5597     return false;
5598 
5599   scalar_dest = gimple_assign_lhs (stmt);
5600   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5601   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5602     {
5603       if (dump_enabled_p ())
5604         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5605                          "bit-precision shifts not supported.\n");
5606       return false;
5607     }
5608 
5609   op0 = gimple_assign_rhs1 (stmt);
5610   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5611     {
5612       if (dump_enabled_p ())
5613         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5614                          "use not simple.\n");
5615       return false;
5616     }
5617   /* If op0 is an external or constant def, infer the vector type
5618      from the scalar type.  */
5619   if (!vectype)
5620     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5621   if (vec_stmt)
5622     gcc_assert (vectype);
5623   if (!vectype)
5624     {
5625       if (dump_enabled_p ())
5626         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5627                          "no vectype for scalar type\n");
5628       return false;
5629     }
5630 
5631   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5632   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5633   if (maybe_ne (nunits_out, nunits_in))
5634     return false;
5635 
5636   op1 = gimple_assign_rhs2 (stmt);
5637   stmt_vec_info op1_def_stmt_info;
5638   if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5639 			   &op1_def_stmt_info))
5640     {
5641       if (dump_enabled_p ())
5642         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5643                          "use not simple.\n");
5644       return false;
5645     }
5646 
5647   /* Multiple types in SLP are handled by creating the appropriate number of
5648      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5649      case of SLP.  */
5650   if (slp_node)
5651     ncopies = 1;
5652   else
5653     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5654 
5655   gcc_assert (ncopies >= 1);
5656 
5657   /* Determine whether the shift amount is a vector, or scalar.  If the
5658      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5659 
5660   if ((dt[1] == vect_internal_def
5661        || dt[1] == vect_induction_def
5662        || dt[1] == vect_nested_cycle)
5663       && !slp_node)
5664     scalar_shift_arg = false;
5665   else if (dt[1] == vect_constant_def
5666 	   || dt[1] == vect_external_def
5667 	   || dt[1] == vect_internal_def)
5668     {
5669       /* In SLP, need to check whether the shift count is the same,
5670 	 in loops if it is a constant or invariant, it is always
5671 	 a scalar shift.  */
5672       if (slp_node)
5673 	{
5674 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5675 	  stmt_vec_info slpstmt_info;
5676 
5677 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5678 	    {
5679 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5680 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5681 		scalar_shift_arg = false;
5682 	    }
5683 
5684 	  /* For internal SLP defs we have to make sure we see scalar stmts
5685 	     for all vector elements.
5686 	     ???  For different vectors we could resort to a different
5687 	     scalar shift operand but code-generation below simply always
5688 	     takes the first.  */
5689 	  if (dt[1] == vect_internal_def
5690 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5691 			   stmts.length ()))
5692 	    scalar_shift_arg = false;
5693 	}
5694 
5695       /* If the shift amount is computed by a pattern stmt we cannot
5696          use the scalar amount directly thus give up and use a vector
5697 	 shift.  */
5698       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5699 	scalar_shift_arg = false;
5700     }
5701   else
5702     {
5703       if (dump_enabled_p ())
5704         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5705                          "operand mode requires invariant argument.\n");
5706       return false;
5707     }
5708 
5709   /* Vector shifted by vector.  */
5710   bool was_scalar_shift_arg = scalar_shift_arg;
5711   if (!scalar_shift_arg)
5712     {
5713       optab = optab_for_tree_code (code, vectype, optab_vector);
5714       if (dump_enabled_p ())
5715         dump_printf_loc (MSG_NOTE, vect_location,
5716                          "vector/vector shift/rotate found.\n");
5717 
5718       if (!op1_vectype)
5719 	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5720 						   slp_node);
5721       incompatible_op1_vectype_p
5722 	= (op1_vectype == NULL_TREE
5723 	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5724 			TYPE_VECTOR_SUBPARTS (vectype))
5725 	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5726       if (incompatible_op1_vectype_p
5727 	  && (!slp_node
5728 	      || SLP_TREE_DEF_TYPE
5729 		   (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5730 	{
5731 	  if (dump_enabled_p ())
5732 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733                              "unusable type for last operand in"
5734                              " vector/vector shift/rotate.\n");
5735 	  return false;
5736 	}
5737     }
5738   /* See if the machine has a vector shifted by scalar insn and if not
5739      then see if it has a vector shifted by vector insn.  */
5740   else
5741     {
5742       optab = optab_for_tree_code (code, vectype, optab_scalar);
5743       if (optab
5744           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5745         {
5746           if (dump_enabled_p ())
5747             dump_printf_loc (MSG_NOTE, vect_location,
5748                              "vector/scalar shift/rotate found.\n");
5749         }
5750       else
5751         {
5752           optab = optab_for_tree_code (code, vectype, optab_vector);
5753           if (optab
5754                && (optab_handler (optab, TYPE_MODE (vectype))
5755                       != CODE_FOR_nothing))
5756             {
5757 	      scalar_shift_arg = false;
5758 
5759               if (dump_enabled_p ())
5760                 dump_printf_loc (MSG_NOTE, vect_location,
5761                                  "vector/vector shift/rotate found.\n");
5762 
5763               /* Unlike the other binary operators, shifts/rotates have
5764                  the rhs being int, instead of the same type as the lhs,
5765                  so make sure the scalar is the right type if we are
5766 		 dealing with vectors of long long/long/short/char.  */
5767 	      incompatible_op1_vectype_p
5768 		= !tree_nop_conversion_p (TREE_TYPE (vectype),
5769 					  TREE_TYPE (op1));
5770             }
5771         }
5772     }
5773 
5774   /* Supportable by target?  */
5775   if (!optab)
5776     {
5777       if (dump_enabled_p ())
5778         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5779                          "no optab.\n");
5780       return false;
5781     }
5782   vec_mode = TYPE_MODE (vectype);
5783   icode = (int) optab_handler (optab, vec_mode);
5784   if (icode == CODE_FOR_nothing)
5785     {
5786       if (dump_enabled_p ())
5787         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5788                          "op not supported by target.\n");
5789       /* Check only during analysis.  */
5790       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5791 	  || (!vec_stmt
5792 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5793         return false;
5794       if (dump_enabled_p ())
5795         dump_printf_loc (MSG_NOTE, vect_location,
5796                          "proceeding using word mode.\n");
5797     }
5798 
5799   /* Worthwhile without SIMD support?  Check only during analysis.  */
5800   if (!vec_stmt
5801       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5802       && !vect_worthwhile_without_simd_p (vinfo, code))
5803     {
5804       if (dump_enabled_p ())
5805         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5806                          "not worthwhile without SIMD support.\n");
5807       return false;
5808     }
5809 
5810   if (!vec_stmt) /* transformation not required.  */
5811     {
5812       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5813       DUMP_VECT_SCOPE ("vectorizable_shift");
5814       vect_model_simple_cost (stmt_info, ncopies, dt,
5815 			      scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5816       return true;
5817     }
5818 
5819   /* Transform.  */
5820 
5821   if (dump_enabled_p ())
5822     dump_printf_loc (MSG_NOTE, vect_location,
5823                      "transform binary/unary operation.\n");
5824 
5825   if (incompatible_op1_vectype_p && !slp_node)
5826     {
5827       op1 = fold_convert (TREE_TYPE (vectype), op1);
5828       if (dt[1] != vect_constant_def)
5829 	op1 = vect_init_vector (stmt_info, op1,
5830 				TREE_TYPE (vectype), NULL);
5831     }
5832 
5833   /* Handle def.  */
5834   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5835 
5836   prev_stmt_info = NULL;
5837   for (j = 0; j < ncopies; j++)
5838     {
5839       /* Handle uses.  */
5840       if (j == 0)
5841         {
5842           if (scalar_shift_arg)
5843             {
5844               /* Vector shl and shr insn patterns can be defined with scalar
5845                  operand 2 (shift operand).  In this case, use constant or loop
5846                  invariant op1 directly, without extending it to vector mode
5847                  first.  */
5848               optab_op2_mode = insn_data[icode].operand[2].mode;
5849               if (!VECTOR_MODE_P (optab_op2_mode))
5850                 {
5851                   if (dump_enabled_p ())
5852                     dump_printf_loc (MSG_NOTE, vect_location,
5853                                      "operand 1 using scalar mode.\n");
5854                   vec_oprnd1 = op1;
5855                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5856                   vec_oprnds1.quick_push (vec_oprnd1);
5857                   if (slp_node)
5858                     {
5859                       /* Store vec_oprnd1 for every vector stmt to be created
5860                          for SLP_NODE.  We check during the analysis that all
5861                          the shift arguments are the same.
5862                          TODO: Allow different constants for different vector
5863                          stmts generated for an SLP instance.  */
5864                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5865                         vec_oprnds1.quick_push (vec_oprnd1);
5866                     }
5867                 }
5868             }
5869 	  else if (slp_node && incompatible_op1_vectype_p)
5870 	    {
5871 	      if (was_scalar_shift_arg)
5872 		{
5873 		  /* If the argument was the same in all lanes create
5874 		     the correctly typed vector shift amount directly.  */
5875 		  op1 = fold_convert (TREE_TYPE (vectype), op1);
5876 		  op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5877 					  !loop_vinfo ? gsi : NULL);
5878 		  vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5879 						 !loop_vinfo ? gsi : NULL);
5880                   vec_oprnds1.create (slp_node->vec_stmts_size);
5881 		  for (k = 0; k < slp_node->vec_stmts_size; k++)
5882 		    vec_oprnds1.quick_push (vec_oprnd1);
5883 		}
5884 	      else if (dt[1] == vect_constant_def)
5885 		{
5886 		  /* Convert the scalar constant shift amounts in-place.  */
5887 		  slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5888 		  gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5889 		  for (unsigned i = 0;
5890 		       i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5891 		    {
5892 		      SLP_TREE_SCALAR_OPS (shift)[i]
5893 			  = fold_convert (TREE_TYPE (vectype),
5894 					  SLP_TREE_SCALAR_OPS (shift)[i]);
5895 		      gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5896 				   == INTEGER_CST));
5897 		    }
5898 		}
5899 	      else
5900 		gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5901 	    }
5902 
5903           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5904              (a special case for certain kind of vector shifts); otherwise,
5905              operand 1 should be of a vector type (the usual case).  */
5906           if (vec_oprnd1)
5907 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5908 			       slp_node);
5909           else
5910 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5911 			       slp_node);
5912         }
5913       else
5914 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5915 
5916       /* Arguments are ready.  Create the new vector stmt.  */
5917       stmt_vec_info new_stmt_info = NULL;
5918       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5919         {
5920           vop1 = vec_oprnds1[i];
5921 	  gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5922           new_temp = make_ssa_name (vec_dest, new_stmt);
5923           gimple_assign_set_lhs (new_stmt, new_temp);
5924 	  new_stmt_info
5925 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5926           if (slp_node)
5927 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5928         }
5929 
5930       if (slp_node)
5931         continue;
5932 
5933       if (j == 0)
5934 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5935       else
5936 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5937       prev_stmt_info = new_stmt_info;
5938     }
5939 
5940   vec_oprnds0.release ();
5941   vec_oprnds1.release ();
5942 
5943   return true;
5944 }
5945 
5946 
5947 /* Function vectorizable_operation.
5948 
5949    Check if STMT_INFO performs a binary, unary or ternary operation that can
5950    be vectorized.
5951    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5952    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5953    Return true if STMT_INFO is vectorizable in this way.  */
5954 
5955 static bool
vectorizable_operation(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5956 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5957 			stmt_vec_info *vec_stmt, slp_tree slp_node,
5958 			stmt_vector_for_cost *cost_vec)
5959 {
5960   tree vec_dest;
5961   tree scalar_dest;
5962   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5963   tree vectype;
5964   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5965   enum tree_code code, orig_code;
5966   machine_mode vec_mode;
5967   tree new_temp;
5968   int op_type;
5969   optab optab;
5970   bool target_support_p;
5971   enum vect_def_type dt[3]
5972     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5973   int ndts = 3;
5974   stmt_vec_info prev_stmt_info;
5975   poly_uint64 nunits_in;
5976   poly_uint64 nunits_out;
5977   tree vectype_out;
5978   int ncopies, vec_num;
5979   int j, i;
5980   vec<tree> vec_oprnds0 = vNULL;
5981   vec<tree> vec_oprnds1 = vNULL;
5982   vec<tree> vec_oprnds2 = vNULL;
5983   tree vop0, vop1, vop2;
5984   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5985   vec_info *vinfo = stmt_info->vinfo;
5986 
5987   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5988     return false;
5989 
5990   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5991       && ! vec_stmt)
5992     return false;
5993 
5994   /* Is STMT a vectorizable binary/unary operation?   */
5995   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5996   if (!stmt)
5997     return false;
5998 
5999   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6000     return false;
6001 
6002   orig_code = code = gimple_assign_rhs_code (stmt);
6003 
6004   /* Shifts are handled in vectorizable_shift.  */
6005   if (code == LSHIFT_EXPR
6006       || code == RSHIFT_EXPR
6007       || code == LROTATE_EXPR
6008       || code == RROTATE_EXPR)
6009    return false;
6010 
6011   /* Comparisons are handled in vectorizable_comparison.  */
6012   if (TREE_CODE_CLASS (code) == tcc_comparison)
6013     return false;
6014 
6015   /* Conditions are handled in vectorizable_condition.  */
6016   if (code == COND_EXPR)
6017     return false;
6018 
6019   /* For pointer addition and subtraction, we should use the normal
6020      plus and minus for the vector operation.  */
6021   if (code == POINTER_PLUS_EXPR)
6022     code = PLUS_EXPR;
6023   if (code == POINTER_DIFF_EXPR)
6024     code = MINUS_EXPR;
6025 
6026   /* Support only unary or binary operations.  */
6027   op_type = TREE_CODE_LENGTH (code);
6028   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6029     {
6030       if (dump_enabled_p ())
6031         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6032                          "num. args = %d (not unary/binary/ternary op).\n",
6033                          op_type);
6034       return false;
6035     }
6036 
6037   scalar_dest = gimple_assign_lhs (stmt);
6038   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6039 
6040   /* Most operations cannot handle bit-precision types without extra
6041      truncations.  */
6042   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6043   if (!mask_op_p
6044       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6045       /* Exception are bitwise binary operations.  */
6046       && code != BIT_IOR_EXPR
6047       && code != BIT_XOR_EXPR
6048       && code != BIT_AND_EXPR)
6049     {
6050       if (dump_enabled_p ())
6051         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6052                          "bit-precision arithmetic not supported.\n");
6053       return false;
6054     }
6055 
6056   op0 = gimple_assign_rhs1 (stmt);
6057   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6058     {
6059       if (dump_enabled_p ())
6060         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6061                          "use not simple.\n");
6062       return false;
6063     }
6064   /* If op0 is an external or constant def, infer the vector type
6065      from the scalar type.  */
6066   if (!vectype)
6067     {
6068       /* For boolean type we cannot determine vectype by
6069 	 invariant value (don't know whether it is a vector
6070 	 of booleans or vector of integers).  We use output
6071 	 vectype because operations on boolean don't change
6072 	 type.  */
6073       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6074 	{
6075 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6076 	    {
6077 	      if (dump_enabled_p ())
6078 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6079 				 "not supported operation on bool value.\n");
6080 	      return false;
6081 	    }
6082 	  vectype = vectype_out;
6083 	}
6084       else
6085 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6086 					       slp_node);
6087     }
6088   if (vec_stmt)
6089     gcc_assert (vectype);
6090   if (!vectype)
6091     {
6092       if (dump_enabled_p ())
6093 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6094 			 "no vectype for scalar type %T\n",
6095 			 TREE_TYPE (op0));
6096 
6097       return false;
6098     }
6099 
6100   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6101   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6102   if (maybe_ne (nunits_out, nunits_in))
6103     return false;
6104 
6105   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6106   if (op_type == binary_op || op_type == ternary_op)
6107     {
6108       op1 = gimple_assign_rhs2 (stmt);
6109       if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
6110 	{
6111 	  if (dump_enabled_p ())
6112 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6113                              "use not simple.\n");
6114 	  return false;
6115 	}
6116     }
6117   if (op_type == ternary_op)
6118     {
6119       op2 = gimple_assign_rhs3 (stmt);
6120       if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
6121 	{
6122 	  if (dump_enabled_p ())
6123 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6124                              "use not simple.\n");
6125 	  return false;
6126 	}
6127     }
6128 
6129   /* Multiple types in SLP are handled by creating the appropriate number of
6130      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6131      case of SLP.  */
6132   if (slp_node)
6133     {
6134       ncopies = 1;
6135       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6136     }
6137   else
6138     {
6139       ncopies = vect_get_num_copies (loop_vinfo, vectype);
6140       vec_num = 1;
6141     }
6142 
6143   gcc_assert (ncopies >= 1);
6144 
6145   /* Reject attempts to combine mask types with nonmask types, e.g. if
6146      we have an AND between a (nonmask) boolean loaded from memory and
6147      a (mask) boolean result of a comparison.
6148 
6149      TODO: We could easily fix these cases up using pattern statements.  */
6150   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6151       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6152       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6153     {
6154       if (dump_enabled_p ())
6155 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6156 			 "mixed mask and nonmask vector types\n");
6157       return false;
6158     }
6159 
6160   /* Supportable by target?  */
6161 
6162   vec_mode = TYPE_MODE (vectype);
6163   if (code == MULT_HIGHPART_EXPR)
6164     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6165   else
6166     {
6167       optab = optab_for_tree_code (code, vectype, optab_default);
6168       if (!optab)
6169 	{
6170           if (dump_enabled_p ())
6171             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6172                              "no optab.\n");
6173 	  return false;
6174 	}
6175       target_support_p = (optab_handler (optab, vec_mode)
6176 			  != CODE_FOR_nothing);
6177     }
6178 
6179   if (!target_support_p)
6180     {
6181       if (dump_enabled_p ())
6182 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6183                          "op not supported by target.\n");
6184       /* Check only during analysis.  */
6185       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6186 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6187         return false;
6188       if (dump_enabled_p ())
6189 	dump_printf_loc (MSG_NOTE, vect_location,
6190                          "proceeding using word mode.\n");
6191     }
6192 
6193   /* Worthwhile without SIMD support?  Check only during analysis.  */
6194   if (!VECTOR_MODE_P (vec_mode)
6195       && !vec_stmt
6196       && !vect_worthwhile_without_simd_p (vinfo, code))
6197     {
6198       if (dump_enabled_p ())
6199         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6200                          "not worthwhile without SIMD support.\n");
6201       return false;
6202     }
6203 
6204   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6205   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6206   internal_fn cond_fn = get_conditional_internal_fn (code);
6207 
6208   if (!vec_stmt) /* transformation not required.  */
6209     {
6210       /* If this operation is part of a reduction, a fully-masked loop
6211 	 should only change the active lanes of the reduction chain,
6212 	 keeping the inactive lanes as-is.  */
6213       if (loop_vinfo
6214 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6215 	  && reduc_idx >= 0)
6216 	{
6217 	  if (cond_fn == IFN_LAST
6218 	      || !direct_internal_fn_supported_p (cond_fn, vectype,
6219 						  OPTIMIZE_FOR_SPEED))
6220 	    {
6221 	      if (dump_enabled_p ())
6222 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6223 				 "can't use a fully-masked loop because no"
6224 				 " conditional operation is available.\n");
6225 	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6226 	    }
6227 	  else
6228 	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6229 				   vectype, NULL);
6230 	}
6231 
6232       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6233       DUMP_VECT_SCOPE ("vectorizable_operation");
6234       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6235       return true;
6236     }
6237 
6238   /* Transform.  */
6239 
6240   if (dump_enabled_p ())
6241     dump_printf_loc (MSG_NOTE, vect_location,
6242                      "transform binary/unary operation.\n");
6243 
6244   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6245 
6246   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6247      vectors with unsigned elements, but the result is signed.  So, we
6248      need to compute the MINUS_EXPR into vectype temporary and
6249      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6250   tree vec_cvt_dest = NULL_TREE;
6251   if (orig_code == POINTER_DIFF_EXPR)
6252     {
6253       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6254       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6255     }
6256   /* Handle def.  */
6257   else
6258     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6259 
6260   /* In case the vectorization factor (VF) is bigger than the number
6261      of elements that we can fit in a vectype (nunits), we have to generate
6262      more than one vector stmt - i.e - we need to "unroll" the
6263      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6264      from one copy of the vector stmt to the next, in the field
6265      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6266      stages to find the correct vector defs to be used when vectorizing
6267      stmts that use the defs of the current stmt.  The example below
6268      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6269      we need to create 4 vectorized stmts):
6270 
6271      before vectorization:
6272                                 RELATED_STMT    VEC_STMT
6273         S1:     x = memref      -               -
6274         S2:     z = x + 1       -               -
6275 
6276      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6277              there):
6278                                 RELATED_STMT    VEC_STMT
6279         VS1_0:  vx0 = memref0   VS1_1           -
6280         VS1_1:  vx1 = memref1   VS1_2           -
6281         VS1_2:  vx2 = memref2   VS1_3           -
6282         VS1_3:  vx3 = memref3   -               -
6283         S1:     x = load        -               VS1_0
6284         S2:     z = x + 1       -               -
6285 
6286      step2: vectorize stmt S2 (done here):
6287         To vectorize stmt S2 we first need to find the relevant vector
6288         def for the first operand 'x'.  This is, as usual, obtained from
6289         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6290         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6291         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6292         the vector stmt VS2_0, and as usual, record it in the
6293         STMT_VINFO_VEC_STMT of stmt S2.
6294         When creating the second copy (VS2_1), we obtain the relevant vector
6295         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6296         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6297         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6298         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6299         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6300         chain of stmts and pointers:
6301                                 RELATED_STMT    VEC_STMT
6302         VS1_0:  vx0 = memref0   VS1_1           -
6303         VS1_1:  vx1 = memref1   VS1_2           -
6304         VS1_2:  vx2 = memref2   VS1_3           -
6305         VS1_3:  vx3 = memref3   -               -
6306         S1:     x = load        -               VS1_0
6307         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6308         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6309         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6310         VS2_3:  vz3 = vx3 + v1  -               -
6311         S2:     z = x + 1       -               VS2_0  */
6312 
6313   prev_stmt_info = NULL;
6314   for (j = 0; j < ncopies; j++)
6315     {
6316       /* Handle uses.  */
6317       if (j == 0)
6318 	{
6319 	  if (op_type == binary_op)
6320 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6321 			       slp_node);
6322 	  else if (op_type == ternary_op)
6323 	    {
6324 	      if (slp_node)
6325 		{
6326 		  auto_vec<vec<tree> > vec_defs(3);
6327 		  vect_get_slp_defs (slp_node, &vec_defs);
6328 		  vec_oprnds0 = vec_defs[0];
6329 		  vec_oprnds1 = vec_defs[1];
6330 		  vec_oprnds2 = vec_defs[2];
6331 		}
6332 	      else
6333 		{
6334 		  vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6335 				     &vec_oprnds1, NULL);
6336 		  vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6337 				     NULL, NULL);
6338 		}
6339 	    }
6340 	  else
6341 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6342 			       slp_node);
6343 	}
6344       else
6345 	{
6346 	  vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6347 	  if (op_type == ternary_op)
6348 	    {
6349 	      tree vec_oprnd = vec_oprnds2.pop ();
6350 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6351 							           vec_oprnd));
6352 	    }
6353 	}
6354 
6355       /* Arguments are ready.  Create the new vector stmt.  */
6356       stmt_vec_info new_stmt_info = NULL;
6357       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6358         {
6359 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
6360 		  ? vec_oprnds1[i] : NULL_TREE);
6361 	  vop2 = ((op_type == ternary_op)
6362 		  ? vec_oprnds2[i] : NULL_TREE);
6363 	  if (masked_loop_p && reduc_idx >= 0)
6364 	    {
6365 	      /* Perform the operation on active elements only and take
6366 		 inactive elements from the reduction chain input.  */
6367 	      gcc_assert (!vop2);
6368 	      vop2 = reduc_idx == 1 ? vop1 : vop0;
6369 	      tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6370 					      vectype, i * ncopies + j);
6371 	      gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6372 							vop0, vop1, vop2);
6373 	      new_temp = make_ssa_name (vec_dest, call);
6374 	      gimple_call_set_lhs (call, new_temp);
6375 	      gimple_call_set_nothrow (call, true);
6376 	      new_stmt_info
6377 		= vect_finish_stmt_generation (stmt_info, call, gsi);
6378 	    }
6379 	  else
6380 	    {
6381 	      gassign *new_stmt = gimple_build_assign (vec_dest, code,
6382 						       vop0, vop1, vop2);
6383 	      new_temp = make_ssa_name (vec_dest, new_stmt);
6384 	      gimple_assign_set_lhs (new_stmt, new_temp);
6385 	      new_stmt_info
6386 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6387 	      if (vec_cvt_dest)
6388 		{
6389 		  new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6390 		  gassign *new_stmt
6391 		    = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6392 					   new_temp);
6393 		  new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6394 		  gimple_assign_set_lhs (new_stmt, new_temp);
6395 		  new_stmt_info
6396 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6397 		}
6398 	    }
6399           if (slp_node)
6400 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6401         }
6402 
6403       if (slp_node)
6404         continue;
6405 
6406       if (j == 0)
6407 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6408       else
6409 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6410       prev_stmt_info = new_stmt_info;
6411     }
6412 
6413   vec_oprnds0.release ();
6414   vec_oprnds1.release ();
6415   vec_oprnds2.release ();
6416 
6417   return true;
6418 }
6419 
6420 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6421 
6422 static void
ensure_base_align(dr_vec_info * dr_info)6423 ensure_base_align (dr_vec_info *dr_info)
6424 {
6425   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6426     return;
6427 
6428   if (dr_info->base_misaligned)
6429     {
6430       tree base_decl = dr_info->base_decl;
6431 
6432       // We should only be able to increase the alignment of a base object if
6433       // we know what its new alignment should be at compile time.
6434       unsigned HOST_WIDE_INT align_base_to =
6435 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6436 
6437       if (decl_in_symtab_p (base_decl))
6438 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6439       else if (DECL_ALIGN (base_decl) < align_base_to)
6440 	{
6441 	  SET_DECL_ALIGN (base_decl, align_base_to);
6442           DECL_USER_ALIGN (base_decl) = 1;
6443 	}
6444       dr_info->base_misaligned = false;
6445     }
6446 }
6447 
6448 
6449 /* Function get_group_alias_ptr_type.
6450 
6451    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6452 
6453 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6454 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6455 {
6456   struct data_reference *first_dr, *next_dr;
6457 
6458   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6459   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6460   while (next_stmt_info)
6461     {
6462       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6463       if (get_alias_set (DR_REF (first_dr))
6464 	  != get_alias_set (DR_REF (next_dr)))
6465 	{
6466 	  if (dump_enabled_p ())
6467 	    dump_printf_loc (MSG_NOTE, vect_location,
6468 			     "conflicting alias set types.\n");
6469 	  return ptr_type_node;
6470 	}
6471       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6472     }
6473   return reference_alias_ptr_type (DR_REF (first_dr));
6474 }
6475 
6476 
6477 /* Function scan_operand_equal_p.
6478 
6479    Helper function for check_scan_store.  Compare two references
6480    with .GOMP_SIMD_LANE bases.  */
6481 
6482 static bool
scan_operand_equal_p(tree ref1,tree ref2)6483 scan_operand_equal_p (tree ref1, tree ref2)
6484 {
6485   tree ref[2] = { ref1, ref2 };
6486   poly_int64 bitsize[2], bitpos[2];
6487   tree offset[2], base[2];
6488   for (int i = 0; i < 2; ++i)
6489     {
6490       machine_mode mode;
6491       int unsignedp, reversep, volatilep = 0;
6492       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6493       				     &offset[i], &mode, &unsignedp,
6494       				     &reversep, &volatilep);
6495       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6496 	return false;
6497       if (TREE_CODE (base[i]) == MEM_REF
6498 	  && offset[i] == NULL_TREE
6499 	  && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6500 	{
6501 	  gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6502 	  if (is_gimple_assign (def_stmt)
6503 	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6504 	      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6505 	      && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6506 	    {
6507 	      if (maybe_ne (mem_ref_offset (base[i]), 0))
6508 		return false;
6509 	      base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6510 	      offset[i] = gimple_assign_rhs2 (def_stmt);
6511 	    }
6512 	}
6513     }
6514 
6515   if (!operand_equal_p (base[0], base[1], 0))
6516     return false;
6517   if (maybe_ne (bitsize[0], bitsize[1]))
6518     return false;
6519   if (offset[0] != offset[1])
6520     {
6521       if (!offset[0] || !offset[1])
6522 	return false;
6523       if (!operand_equal_p (offset[0], offset[1], 0))
6524 	{
6525 	  tree step[2];
6526 	  for (int i = 0; i < 2; ++i)
6527 	    {
6528 	      step[i] = integer_one_node;
6529 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6530 		{
6531 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6532 		  if (is_gimple_assign (def_stmt)
6533 		      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6534 		      && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6535 			  == INTEGER_CST))
6536 		    {
6537 		      step[i] = gimple_assign_rhs2 (def_stmt);
6538 		      offset[i] = gimple_assign_rhs1 (def_stmt);
6539 		    }
6540 		}
6541 	      else if (TREE_CODE (offset[i]) == MULT_EXPR)
6542 		{
6543 		  step[i] = TREE_OPERAND (offset[i], 1);
6544 		  offset[i] = TREE_OPERAND (offset[i], 0);
6545 		}
6546 	      tree rhs1 = NULL_TREE;
6547 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6548 		{
6549 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6550 		  if (gimple_assign_cast_p (def_stmt))
6551 		    rhs1 = gimple_assign_rhs1 (def_stmt);
6552 		}
6553 	      else if (CONVERT_EXPR_P (offset[i]))
6554 		rhs1 = TREE_OPERAND (offset[i], 0);
6555 	      if (rhs1
6556 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6557 		  && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6558 		  && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6559 		      >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6560 		offset[i] = rhs1;
6561 	    }
6562 	  if (!operand_equal_p (offset[0], offset[1], 0)
6563 	      || !operand_equal_p (step[0], step[1], 0))
6564 	    return false;
6565 	}
6566     }
6567   return true;
6568 }
6569 
6570 
6571 enum scan_store_kind {
6572   /* Normal permutation.  */
6573   scan_store_kind_perm,
6574 
6575   /* Whole vector left shift permutation with zero init.  */
6576   scan_store_kind_lshift_zero,
6577 
6578   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
6579   scan_store_kind_lshift_cond
6580 };
6581 
6582 /* Function check_scan_store.
6583 
6584    Verify if we can perform the needed permutations or whole vector shifts.
6585    Return -1 on failure, otherwise exact log2 of vectype's nunits.
6586    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6587    to do at each step.  */
6588 
6589 static int
6590 scan_store_can_perm_p (tree vectype, tree init,
6591 		       vec<enum scan_store_kind> *use_whole_vector = NULL)
6592 {
6593   enum machine_mode vec_mode = TYPE_MODE (vectype);
6594   unsigned HOST_WIDE_INT nunits;
6595   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6596     return -1;
6597   int units_log2 = exact_log2 (nunits);
6598   if (units_log2 <= 0)
6599     return -1;
6600 
6601   int i;
6602   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6603   for (i = 0; i <= units_log2; ++i)
6604     {
6605       unsigned HOST_WIDE_INT j, k;
6606       enum scan_store_kind kind = scan_store_kind_perm;
6607       vec_perm_builder sel (nunits, nunits, 1);
6608       sel.quick_grow (nunits);
6609       if (i == units_log2)
6610 	{
6611 	  for (j = 0; j < nunits; ++j)
6612 	    sel[j] = nunits - 1;
6613 	}
6614       else
6615 	{
6616 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6617 	    sel[j] = j;
6618 	  for (k = 0; j < nunits; ++j, ++k)
6619 	    sel[j] = nunits + k;
6620 	}
6621       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6622       if (!can_vec_perm_const_p (vec_mode, indices))
6623 	{
6624 	  if (i == units_log2)
6625 	    return -1;
6626 
6627 	  if (whole_vector_shift_kind == scan_store_kind_perm)
6628 	    {
6629 	      if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6630 		return -1;
6631 	      whole_vector_shift_kind = scan_store_kind_lshift_zero;
6632 	      /* Whole vector shifts shift in zeros, so if init is all zero
6633 		 constant, there is no need to do anything further.  */
6634 	      if ((TREE_CODE (init) != INTEGER_CST
6635 		   && TREE_CODE (init) != REAL_CST)
6636 		  || !initializer_zerop (init))
6637 		{
6638 		  tree masktype = truth_type_for (vectype);
6639 		  if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6640 		    return -1;
6641 		  whole_vector_shift_kind = scan_store_kind_lshift_cond;
6642 		}
6643 	    }
6644 	  kind = whole_vector_shift_kind;
6645 	}
6646       if (use_whole_vector)
6647 	{
6648 	  if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6649 	    use_whole_vector->safe_grow_cleared (i);
6650 	  if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6651 	    use_whole_vector->safe_push (kind);
6652 	}
6653     }
6654 
6655   return units_log2;
6656 }
6657 
6658 
6659 /* Function check_scan_store.
6660 
6661    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
6662 
6663 static bool
check_scan_store(stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6664 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6665 		  enum vect_def_type rhs_dt, bool slp, tree mask,
6666 		  vect_memory_access_type memory_access_type)
6667 {
6668   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6669   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6670   tree ref_type;
6671 
6672   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6673   if (slp
6674       || mask
6675       || memory_access_type != VMAT_CONTIGUOUS
6676       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6677       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6678       || loop_vinfo == NULL
6679       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6680       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6681       || !integer_zerop (get_dr_vinfo_offset (dr_info))
6682       || !integer_zerop (DR_INIT (dr_info->dr))
6683       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6684       || !alias_sets_conflict_p (get_alias_set (vectype),
6685 				 get_alias_set (TREE_TYPE (ref_type))))
6686     {
6687       if (dump_enabled_p ())
6688 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6689 			 "unsupported OpenMP scan store.\n");
6690       return false;
6691     }
6692 
6693   /* We need to pattern match code built by OpenMP lowering and simplified
6694      by following optimizations into something we can handle.
6695      #pragma omp simd reduction(inscan,+:r)
6696      for (...)
6697        {
6698 	 r += something ();
6699 	 #pragma omp scan inclusive (r)
6700 	 use (r);
6701        }
6702      shall have body with:
6703        // Initialization for input phase, store the reduction initializer:
6704        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6705        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6706        D.2042[_21] = 0;
6707        // Actual input phase:
6708        ...
6709        r.0_5 = D.2042[_20];
6710        _6 = _4 + r.0_5;
6711        D.2042[_20] = _6;
6712        // Initialization for scan phase:
6713        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6714        _26 = D.2043[_25];
6715        _27 = D.2042[_25];
6716        _28 = _26 + _27;
6717        D.2043[_25] = _28;
6718        D.2042[_25] = _28;
6719        // Actual scan phase:
6720        ...
6721        r.1_8 = D.2042[_20];
6722        ...
6723      The "omp simd array" variable D.2042 holds the privatized copy used
6724      inside of the loop and D.2043 is another one that holds copies of
6725      the current original list item.  The separate GOMP_SIMD_LANE ifn
6726      kinds are there in order to allow optimizing the initializer store
6727      and combiner sequence, e.g. if it is originally some C++ish user
6728      defined reduction, but allow the vectorizer to pattern recognize it
6729      and turn into the appropriate vectorized scan.
6730 
6731      For exclusive scan, this is slightly different:
6732      #pragma omp simd reduction(inscan,+:r)
6733      for (...)
6734        {
6735 	 use (r);
6736 	 #pragma omp scan exclusive (r)
6737 	 r += something ();
6738        }
6739      shall have body with:
6740        // Initialization for input phase, store the reduction initializer:
6741        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6742        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6743        D.2042[_21] = 0;
6744        // Actual input phase:
6745        ...
6746        r.0_5 = D.2042[_20];
6747        _6 = _4 + r.0_5;
6748        D.2042[_20] = _6;
6749        // Initialization for scan phase:
6750        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6751        _26 = D.2043[_25];
6752        D.2044[_25] = _26;
6753        _27 = D.2042[_25];
6754        _28 = _26 + _27;
6755        D.2043[_25] = _28;
6756        // Actual scan phase:
6757        ...
6758        r.1_8 = D.2044[_20];
6759        ...  */
6760 
6761   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6762     {
6763       /* Match the D.2042[_21] = 0; store above.  Just require that
6764 	 it is a constant or external definition store.  */
6765       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6766 	{
6767 	 fail_init:
6768 	  if (dump_enabled_p ())
6769 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6770 			     "unsupported OpenMP scan initializer store.\n");
6771 	  return false;
6772 	}
6773 
6774       if (! loop_vinfo->scan_map)
6775 	loop_vinfo->scan_map = new hash_map<tree, tree>;
6776       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6777       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6778       if (cached)
6779 	goto fail_init;
6780       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6781 
6782       /* These stores can be vectorized normally.  */
6783       return true;
6784     }
6785 
6786   if (rhs_dt != vect_internal_def)
6787     {
6788      fail:
6789       if (dump_enabled_p ())
6790 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6791 			 "unsupported OpenMP scan combiner pattern.\n");
6792       return false;
6793     }
6794 
6795   gimple *stmt = STMT_VINFO_STMT (stmt_info);
6796   tree rhs = gimple_assign_rhs1 (stmt);
6797   if (TREE_CODE (rhs) != SSA_NAME)
6798     goto fail;
6799 
6800   gimple *other_store_stmt = NULL;
6801   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6802   bool inscan_var_store
6803     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6804 
6805   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6806     {
6807       if (!inscan_var_store)
6808 	{
6809 	  use_operand_p use_p;
6810 	  imm_use_iterator iter;
6811 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6812 	    {
6813 	      gimple *use_stmt = USE_STMT (use_p);
6814 	      if (use_stmt == stmt || is_gimple_debug (use_stmt))
6815 		continue;
6816 	      if (gimple_bb (use_stmt) != gimple_bb (stmt)
6817 		  || !is_gimple_assign (use_stmt)
6818 		  || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6819 		  || other_store_stmt
6820 		  || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6821 		goto fail;
6822 	      other_store_stmt = use_stmt;
6823 	    }
6824 	  if (other_store_stmt == NULL)
6825 	    goto fail;
6826 	  rhs = gimple_assign_lhs (other_store_stmt);
6827 	  if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6828 	    goto fail;
6829 	}
6830     }
6831   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6832     {
6833       use_operand_p use_p;
6834       imm_use_iterator iter;
6835       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6836 	{
6837 	  gimple *use_stmt = USE_STMT (use_p);
6838 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
6839 	    continue;
6840 	  if (other_store_stmt)
6841 	    goto fail;
6842 	  other_store_stmt = use_stmt;
6843 	}
6844     }
6845   else
6846     goto fail;
6847 
6848   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6849   if (gimple_bb (def_stmt) != gimple_bb (stmt)
6850       || !is_gimple_assign (def_stmt)
6851       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6852     goto fail;
6853 
6854   enum tree_code code = gimple_assign_rhs_code (def_stmt);
6855   /* For pointer addition, we should use the normal plus for the vector
6856      operation.  */
6857   switch (code)
6858     {
6859     case POINTER_PLUS_EXPR:
6860       code = PLUS_EXPR;
6861       break;
6862     case MULT_HIGHPART_EXPR:
6863       goto fail;
6864     default:
6865       break;
6866     }
6867   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6868     goto fail;
6869 
6870   tree rhs1 = gimple_assign_rhs1 (def_stmt);
6871   tree rhs2 = gimple_assign_rhs2 (def_stmt);
6872   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6873     goto fail;
6874 
6875   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6876   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6877   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6878       || !gimple_assign_load_p (load1_stmt)
6879       || gimple_bb (load2_stmt) != gimple_bb (stmt)
6880       || !gimple_assign_load_p (load2_stmt))
6881     goto fail;
6882 
6883   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6884   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6885   if (load1_stmt_info == NULL
6886       || load2_stmt_info == NULL
6887       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6888 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6889       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6890 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6891     goto fail;
6892 
6893   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6894     {
6895       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6896       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6897 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6898 	goto fail;
6899       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6900       tree lrhs;
6901       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6902 	lrhs = rhs1;
6903       else
6904 	lrhs = rhs2;
6905       use_operand_p use_p;
6906       imm_use_iterator iter;
6907       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6908 	{
6909 	  gimple *use_stmt = USE_STMT (use_p);
6910 	  if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6911 	    continue;
6912 	  if (other_store_stmt)
6913 	    goto fail;
6914 	  other_store_stmt = use_stmt;
6915 	}
6916     }
6917 
6918   if (other_store_stmt == NULL)
6919     goto fail;
6920   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6921       || !gimple_store_p (other_store_stmt))
6922     goto fail;
6923 
6924   stmt_vec_info other_store_stmt_info
6925     = loop_vinfo->lookup_stmt (other_store_stmt);
6926   if (other_store_stmt_info == NULL
6927       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6928 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6929     goto fail;
6930 
6931   gimple *stmt1 = stmt;
6932   gimple *stmt2 = other_store_stmt;
6933   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6934     std::swap (stmt1, stmt2);
6935   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6936 			    gimple_assign_rhs1 (load2_stmt)))
6937     {
6938       std::swap (rhs1, rhs2);
6939       std::swap (load1_stmt, load2_stmt);
6940       std::swap (load1_stmt_info, load2_stmt_info);
6941     }
6942   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6943 			     gimple_assign_rhs1 (load1_stmt)))
6944     goto fail;
6945 
6946   tree var3 = NULL_TREE;
6947   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6948       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6949 				gimple_assign_rhs1 (load2_stmt)))
6950     goto fail;
6951   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6952     {
6953       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6954       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6955 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6956 	goto fail;
6957       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6958       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6959 	  || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6960 	  || lookup_attribute ("omp simd inscan exclusive",
6961 			       DECL_ATTRIBUTES (var3)))
6962 	goto fail;
6963     }
6964 
6965   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6966   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6967       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6968     goto fail;
6969 
6970   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6971   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6972   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6973       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6974       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6975 	 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6976     goto fail;
6977 
6978   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6979     std::swap (var1, var2);
6980 
6981   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6982     {
6983       if (!lookup_attribute ("omp simd inscan exclusive",
6984 			     DECL_ATTRIBUTES (var1)))
6985 	goto fail;
6986       var1 = var3;
6987     }
6988 
6989   if (loop_vinfo->scan_map == NULL)
6990     goto fail;
6991   tree *init = loop_vinfo->scan_map->get (var1);
6992   if (init == NULL)
6993     goto fail;
6994 
6995   /* The IL is as expected, now check if we can actually vectorize it.
6996      Inclusive scan:
6997        _26 = D.2043[_25];
6998        _27 = D.2042[_25];
6999        _28 = _26 + _27;
7000        D.2043[_25] = _28;
7001        D.2042[_25] = _28;
7002      should be vectorized as (where _40 is the vectorized rhs
7003      from the D.2042[_21] = 0; store):
7004        _30 = MEM <vector(8) int> [(int *)&D.2043];
7005        _31 = MEM <vector(8) int> [(int *)&D.2042];
7006        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7007        _33 = _31 + _32;
7008        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7009        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7010        _35 = _33 + _34;
7011        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7012        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
7013        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7014        _37 = _35 + _36;
7015        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7016        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
7017        _38 = _30 + _37;
7018        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7019        MEM <vector(8) int> [(int *)&D.2043] = _39;
7020        MEM <vector(8) int> [(int *)&D.2042] = _38;
7021      Exclusive scan:
7022        _26 = D.2043[_25];
7023        D.2044[_25] = _26;
7024        _27 = D.2042[_25];
7025        _28 = _26 + _27;
7026        D.2043[_25] = _28;
7027      should be vectorized as (where _40 is the vectorized rhs
7028      from the D.2042[_21] = 0; store):
7029        _30 = MEM <vector(8) int> [(int *)&D.2043];
7030        _31 = MEM <vector(8) int> [(int *)&D.2042];
7031        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7032        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7033        _34 = _32 + _33;
7034        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7035        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
7036        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7037        _36 = _34 + _35;
7038        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7039        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
7040        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7041        _38 = _36 + _37;
7042        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7043        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
7044        _39 = _30 + _38;
7045        _50 = _31 + _39;
7046        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7047        MEM <vector(8) int> [(int *)&D.2044] = _39;
7048        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
7049   enum machine_mode vec_mode = TYPE_MODE (vectype);
7050   optab optab = optab_for_tree_code (code, vectype, optab_default);
7051   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7052     goto fail;
7053 
7054   int units_log2 = scan_store_can_perm_p (vectype, *init);
7055   if (units_log2 == -1)
7056     goto fail;
7057 
7058   return true;
7059 }
7060 
7061 
7062 /* Function vectorizable_scan_store.
7063 
7064    Helper of vectorizable_score, arguments like on vectorizable_store.
7065    Handle only the transformation, checking is done in check_scan_store.  */
7066 
7067 static bool
vectorizable_scan_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,int ncopies)7068 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7069 			 stmt_vec_info *vec_stmt, int ncopies)
7070 {
7071   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7072   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7073   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7074   vec_info *vinfo = stmt_info->vinfo;
7075   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7076 
7077   if (dump_enabled_p ())
7078     dump_printf_loc (MSG_NOTE, vect_location,
7079 		     "transform scan store. ncopies = %d\n", ncopies);
7080 
7081   gimple *stmt = STMT_VINFO_STMT (stmt_info);
7082   tree rhs = gimple_assign_rhs1 (stmt);
7083   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7084 
7085   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7086   bool inscan_var_store
7087     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7088 
7089   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7090     {
7091       use_operand_p use_p;
7092       imm_use_iterator iter;
7093       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7094 	{
7095 	  gimple *use_stmt = USE_STMT (use_p);
7096 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
7097 	    continue;
7098 	  rhs = gimple_assign_lhs (use_stmt);
7099 	  break;
7100 	}
7101     }
7102 
7103   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7104   enum tree_code code = gimple_assign_rhs_code (def_stmt);
7105   if (code == POINTER_PLUS_EXPR)
7106     code = PLUS_EXPR;
7107   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7108 	      && commutative_tree_code (code));
7109   tree rhs1 = gimple_assign_rhs1 (def_stmt);
7110   tree rhs2 = gimple_assign_rhs2 (def_stmt);
7111   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7112   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7113   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7114   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7115   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7116   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7117   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7118   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7119   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7120 
7121   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7122     {
7123       std::swap (rhs1, rhs2);
7124       std::swap (var1, var2);
7125       std::swap (load1_dr_info, load2_dr_info);
7126     }
7127 
7128   tree *init = loop_vinfo->scan_map->get (var1);
7129   gcc_assert (init);
7130 
7131   unsigned HOST_WIDE_INT nunits;
7132   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7133     gcc_unreachable ();
7134   auto_vec<enum scan_store_kind, 16> use_whole_vector;
7135   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7136   gcc_assert (units_log2 > 0);
7137   auto_vec<tree, 16> perms;
7138   perms.quick_grow (units_log2 + 1);
7139   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7140   for (int i = 0; i <= units_log2; ++i)
7141     {
7142       unsigned HOST_WIDE_INT j, k;
7143       vec_perm_builder sel (nunits, nunits, 1);
7144       sel.quick_grow (nunits);
7145       if (i == units_log2)
7146 	for (j = 0; j < nunits; ++j)
7147 	  sel[j] = nunits - 1;
7148       else
7149 	{
7150 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7151 	    sel[j] = j;
7152 	  for (k = 0; j < nunits; ++j, ++k)
7153 	    sel[j] = nunits + k;
7154 	}
7155       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7156       if (!use_whole_vector.is_empty ()
7157 	  && use_whole_vector[i] != scan_store_kind_perm)
7158 	{
7159 	  if (zero_vec == NULL_TREE)
7160 	    zero_vec = build_zero_cst (vectype);
7161 	  if (masktype == NULL_TREE
7162 	      && use_whole_vector[i] == scan_store_kind_lshift_cond)
7163 	    masktype = truth_type_for (vectype);
7164 	  perms[i] = vect_gen_perm_mask_any (vectype, indices);
7165 	}
7166       else
7167 	perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7168     }
7169 
7170   stmt_vec_info prev_stmt_info = NULL;
7171   tree vec_oprnd1 = NULL_TREE;
7172   tree vec_oprnd2 = NULL_TREE;
7173   tree vec_oprnd3 = NULL_TREE;
7174   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7175   tree dataref_offset = build_int_cst (ref_type, 0);
7176   tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7177   tree ldataref_ptr = NULL_TREE;
7178   tree orig = NULL_TREE;
7179   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7180     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7181   for (int j = 0; j < ncopies; j++)
7182     {
7183       stmt_vec_info new_stmt_info;
7184       if (j == 0)
7185 	{
7186 	  vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7187 	  if (ldataref_ptr == NULL)
7188 	    vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7189 	  vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7190 	  orig = vec_oprnd3;
7191 	}
7192       else
7193 	{
7194 	  vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7195 	  if (ldataref_ptr == NULL)
7196 	    vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7197 	  vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7198 	  if (!inscan_var_store)
7199 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7200 	}
7201 
7202       if (ldataref_ptr)
7203 	{
7204 	  vec_oprnd2 = make_ssa_name (vectype);
7205 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7206 				       unshare_expr (ldataref_ptr),
7207 				       dataref_offset);
7208 	  vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7209 	  gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7210 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7211 	  if (prev_stmt_info == NULL)
7212 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7213 	  else
7214 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7215 	  prev_stmt_info = new_stmt_info;
7216 	}
7217 
7218       tree v = vec_oprnd2;
7219       for (int i = 0; i < units_log2; ++i)
7220 	{
7221 	  tree new_temp = make_ssa_name (vectype);
7222 	  gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7223 					   (zero_vec
7224 					    && (use_whole_vector[i]
7225 						!= scan_store_kind_perm))
7226 					   ? zero_vec : vec_oprnd1, v,
7227 					   perms[i]);
7228 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7229 	  if (prev_stmt_info == NULL)
7230 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7231 	  else
7232 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7233 	  prev_stmt_info = new_stmt_info;
7234 
7235 	  if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7236 	    {
7237 	      /* Whole vector shift shifted in zero bits, but if *init
7238 		 is not initializer_zerop, we need to replace those elements
7239 		 with elements from vec_oprnd1.  */
7240 	      tree_vector_builder vb (masktype, nunits, 1);
7241 	      for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7242 		vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7243 			       ? boolean_false_node : boolean_true_node);
7244 
7245 	      tree new_temp2 = make_ssa_name (vectype);
7246 	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7247 				       new_temp, vec_oprnd1);
7248 	      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7249 	      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7250 	      prev_stmt_info = new_stmt_info;
7251 	      new_temp = new_temp2;
7252 	    }
7253 
7254 	  /* For exclusive scan, perform the perms[i] permutation once
7255 	     more.  */
7256 	  if (i == 0
7257 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7258 	      && v == vec_oprnd2)
7259 	    {
7260 	      v = new_temp;
7261 	      --i;
7262 	      continue;
7263 	    }
7264 
7265 	  tree new_temp2 = make_ssa_name (vectype);
7266 	  g = gimple_build_assign (new_temp2, code, v, new_temp);
7267 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7268 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7269 	  prev_stmt_info = new_stmt_info;
7270 
7271 	  v = new_temp2;
7272 	}
7273 
7274       tree new_temp = make_ssa_name (vectype);
7275       gimple *g = gimple_build_assign (new_temp, code, orig, v);
7276       new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7277       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7278       prev_stmt_info = new_stmt_info;
7279 
7280       tree last_perm_arg = new_temp;
7281       /* For exclusive scan, new_temp computed above is the exclusive scan
7282 	 prefix sum.  Turn it into inclusive prefix sum for the broadcast
7283 	 of the last element into orig.  */
7284       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7285 	{
7286 	  last_perm_arg = make_ssa_name (vectype);
7287 	  g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7288 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7289 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7290 	  prev_stmt_info = new_stmt_info;
7291 	}
7292 
7293       orig = make_ssa_name (vectype);
7294       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7295 			       last_perm_arg, perms[units_log2]);
7296       new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7297       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7298       prev_stmt_info = new_stmt_info;
7299 
7300       if (!inscan_var_store)
7301 	{
7302 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7303 				       unshare_expr (dataref_ptr),
7304 				       dataref_offset);
7305 	  vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7306 	  g = gimple_build_assign (data_ref, new_temp);
7307 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7308 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7309 	  prev_stmt_info = new_stmt_info;
7310 	}
7311     }
7312 
7313   if (inscan_var_store)
7314     for (int j = 0; j < ncopies; j++)
7315       {
7316 	if (j != 0)
7317 	  dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7318 
7319 	tree data_ref = fold_build2 (MEM_REF, vectype,
7320 				     unshare_expr (dataref_ptr),
7321 				     dataref_offset);
7322 	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7323 	gimple *g = gimple_build_assign (data_ref, orig);
7324 	stmt_vec_info new_stmt_info
7325 	  = vect_finish_stmt_generation (stmt_info, g, gsi);
7326 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7327 	prev_stmt_info = new_stmt_info;
7328       }
7329   return true;
7330 }
7331 
7332 
7333 /* Function vectorizable_store.
7334 
7335    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7336    that can be vectorized.
7337    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7338    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7339    Return true if STMT_INFO is vectorizable in this way.  */
7340 
7341 static bool
vectorizable_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7342 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7343 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
7344 		    stmt_vector_for_cost *cost_vec)
7345 {
7346   tree data_ref;
7347   tree op;
7348   tree vec_oprnd = NULL_TREE;
7349   tree elem_type;
7350   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7351   class loop *loop = NULL;
7352   machine_mode vec_mode;
7353   tree dummy;
7354   enum dr_alignment_support alignment_support_scheme;
7355   enum vect_def_type rhs_dt = vect_unknown_def_type;
7356   enum vect_def_type mask_dt = vect_unknown_def_type;
7357   stmt_vec_info prev_stmt_info = NULL;
7358   tree dataref_ptr = NULL_TREE;
7359   tree dataref_offset = NULL_TREE;
7360   gimple *ptr_incr = NULL;
7361   int ncopies;
7362   int j;
7363   stmt_vec_info first_stmt_info;
7364   bool grouped_store;
7365   unsigned int group_size, i;
7366   vec<tree> oprnds = vNULL;
7367   vec<tree> result_chain = vNULL;
7368   tree offset = NULL_TREE;
7369   vec<tree> vec_oprnds = vNULL;
7370   bool slp = (slp_node != NULL);
7371   unsigned int vec_num;
7372   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7373   vec_info *vinfo = stmt_info->vinfo;
7374   tree aggr_type;
7375   gather_scatter_info gs_info;
7376   poly_uint64 vf;
7377   vec_load_store_type vls_type;
7378   tree ref_type;
7379 
7380   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7381     return false;
7382 
7383   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7384       && ! vec_stmt)
7385     return false;
7386 
7387   /* Is vectorizable store? */
7388 
7389   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7390   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7391     {
7392       tree scalar_dest = gimple_assign_lhs (assign);
7393       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7394 	  && is_pattern_stmt_p (stmt_info))
7395 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
7396       if (TREE_CODE (scalar_dest) != ARRAY_REF
7397 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7398 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
7399 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
7400 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7401 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
7402 	  && TREE_CODE (scalar_dest) != MEM_REF)
7403 	return false;
7404     }
7405   else
7406     {
7407       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7408       if (!call || !gimple_call_internal_p (call))
7409 	return false;
7410 
7411       internal_fn ifn = gimple_call_internal_fn (call);
7412       if (!internal_store_fn_p (ifn))
7413 	return false;
7414 
7415       if (slp_node != NULL)
7416 	{
7417 	  if (dump_enabled_p ())
7418 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7419 			     "SLP of masked stores not supported.\n");
7420 	  return false;
7421 	}
7422 
7423       int mask_index = internal_fn_mask_index (ifn);
7424       if (mask_index >= 0)
7425 	{
7426 	  mask = gimple_call_arg (call, mask_index);
7427 	  if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
7428 				       &mask_vectype))
7429 	    return false;
7430 	}
7431     }
7432 
7433   op = vect_get_store_rhs (stmt_info);
7434 
7435   /* Cannot have hybrid store SLP -- that would mean storing to the
7436      same location twice.  */
7437   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7438 
7439   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7440   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7441 
7442   if (loop_vinfo)
7443     {
7444       loop = LOOP_VINFO_LOOP (loop_vinfo);
7445       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7446     }
7447   else
7448     vf = 1;
7449 
7450   /* Multiple types in SLP are handled by creating the appropriate number of
7451      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7452      case of SLP.  */
7453   if (slp)
7454     ncopies = 1;
7455   else
7456     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7457 
7458   gcc_assert (ncopies >= 1);
7459 
7460   /* FORNOW.  This restriction should be relaxed.  */
7461   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7462     {
7463       if (dump_enabled_p ())
7464 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7465 			 "multiple types in nested loop.\n");
7466       return false;
7467     }
7468 
7469   if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7470     return false;
7471 
7472   elem_type = TREE_TYPE (vectype);
7473   vec_mode = TYPE_MODE (vectype);
7474 
7475   if (!STMT_VINFO_DATA_REF (stmt_info))
7476     return false;
7477 
7478   vect_memory_access_type memory_access_type;
7479   if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7480 			    &memory_access_type, &gs_info))
7481     return false;
7482 
7483   if (mask)
7484     {
7485       if (memory_access_type == VMAT_CONTIGUOUS)
7486 	{
7487 	  if (!VECTOR_MODE_P (vec_mode)
7488 	      || !can_vec_mask_load_store_p (vec_mode,
7489 					     TYPE_MODE (mask_vectype), false))
7490 	    return false;
7491 	}
7492       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7493 	       && (memory_access_type != VMAT_GATHER_SCATTER
7494 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7495 	{
7496 	  if (dump_enabled_p ())
7497 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7498 			     "unsupported access type for masked store.\n");
7499 	  return false;
7500 	}
7501     }
7502   else
7503     {
7504       /* FORNOW. In some cases can vectorize even if data-type not supported
7505 	 (e.g. - array initialization with 0).  */
7506       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7507 	return false;
7508     }
7509 
7510   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7511   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7512 		   && memory_access_type != VMAT_GATHER_SCATTER
7513 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
7514   if (grouped_store)
7515     {
7516       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7517       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7518       group_size = DR_GROUP_SIZE (first_stmt_info);
7519     }
7520   else
7521     {
7522       first_stmt_info = stmt_info;
7523       first_dr_info = dr_info;
7524       group_size = vec_num = 1;
7525     }
7526 
7527   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7528     {
7529       if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7530 			     memory_access_type))
7531 	return false;
7532     }
7533 
7534   if (!vec_stmt) /* transformation not required.  */
7535     {
7536       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7537 
7538       if (loop_vinfo
7539 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7540 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7541 				  memory_access_type, &gs_info, mask);
7542 
7543       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7544       vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7545 			     vls_type, slp_node, cost_vec);
7546       return true;
7547     }
7548   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7549 
7550   /* Transform.  */
7551 
7552   ensure_base_align (dr_info);
7553 
7554   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7555     {
7556       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7557       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7558       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7559       tree ptr, var, scale, vec_mask;
7560       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7561       tree mask_halfvectype = mask_vectype;
7562       edge pe = loop_preheader_edge (loop);
7563       gimple_seq seq;
7564       basic_block new_bb;
7565       enum { NARROW, NONE, WIDEN } modifier;
7566       poly_uint64 scatter_off_nunits
7567 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7568 
7569       if (known_eq (nunits, scatter_off_nunits))
7570 	modifier = NONE;
7571       else if (known_eq (nunits * 2, scatter_off_nunits))
7572 	{
7573 	  modifier = WIDEN;
7574 
7575 	  /* Currently gathers and scatters are only supported for
7576 	     fixed-length vectors.  */
7577 	  unsigned int count = scatter_off_nunits.to_constant ();
7578 	  vec_perm_builder sel (count, count, 1);
7579 	  for (i = 0; i < (unsigned int) count; ++i)
7580 	    sel.quick_push (i | (count / 2));
7581 
7582 	  vec_perm_indices indices (sel, 1, count);
7583 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7584 						  indices);
7585 	  gcc_assert (perm_mask != NULL_TREE);
7586 	}
7587       else if (known_eq (nunits, scatter_off_nunits * 2))
7588 	{
7589 	  modifier = NARROW;
7590 
7591 	  /* Currently gathers and scatters are only supported for
7592 	     fixed-length vectors.  */
7593 	  unsigned int count = nunits.to_constant ();
7594 	  vec_perm_builder sel (count, count, 1);
7595 	  for (i = 0; i < (unsigned int) count; ++i)
7596 	    sel.quick_push (i | (count / 2));
7597 
7598 	  vec_perm_indices indices (sel, 2, count);
7599 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7600 	  gcc_assert (perm_mask != NULL_TREE);
7601 	  ncopies *= 2;
7602 
7603 	  if (mask)
7604 	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7605 	}
7606       else
7607 	gcc_unreachable ();
7608 
7609       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7610       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7611       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7612       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7613       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7614       scaletype = TREE_VALUE (arglist);
7615 
7616       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7617 			   && TREE_CODE (rettype) == VOID_TYPE);
7618 
7619       ptr = fold_convert (ptrtype, gs_info.base);
7620       if (!is_gimple_min_invariant (ptr))
7621 	{
7622 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7623 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7624 	  gcc_assert (!new_bb);
7625 	}
7626 
7627       if (mask == NULL_TREE)
7628 	{
7629 	  mask_arg = build_int_cst (masktype, -1);
7630 	  mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7631 	}
7632 
7633       scale = build_int_cst (scaletype, gs_info.scale);
7634 
7635       prev_stmt_info = NULL;
7636       for (j = 0; j < ncopies; ++j)
7637 	{
7638 	  if (j == 0)
7639 	    {
7640 	      src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7641 	      op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7642 							      stmt_info);
7643 	      if (mask)
7644 		{
7645 		  tree mask_vectype = truth_type_for (vectype);
7646 		  mask_op = vec_mask
7647 		    = vect_get_vec_def_for_operand (mask,
7648 						    stmt_info, mask_vectype);
7649 		}
7650 	    }
7651 	  else if (modifier != NONE && (j & 1))
7652 	    {
7653 	      if (modifier == WIDEN)
7654 		{
7655 		  src
7656 		    = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7657 								   vec_oprnd1);
7658 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7659 					     stmt_info, gsi);
7660 		  if (mask)
7661 		    mask_op
7662 		      = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7663 								   vec_mask);
7664 		}
7665 	      else if (modifier == NARROW)
7666 		{
7667 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7668 					      stmt_info, gsi);
7669 		  op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7670 								    vec_oprnd0);
7671 		}
7672 	      else
7673 		gcc_unreachable ();
7674 	    }
7675 	  else
7676 	    {
7677 	      src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7678 								 vec_oprnd1);
7679 	      op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7680 								vec_oprnd0);
7681 	      if (mask)
7682 		mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7683 								     vec_mask);
7684 	    }
7685 
7686 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7687 	    {
7688 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7689 				    TYPE_VECTOR_SUBPARTS (srctype)));
7690 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
7691 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7692 	      gassign *new_stmt
7693 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7694 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7695 	      src = var;
7696 	    }
7697 
7698 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7699 	    {
7700 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7701 				    TYPE_VECTOR_SUBPARTS (idxtype)));
7702 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7703 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7704 	      gassign *new_stmt
7705 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7706 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7707 	      op = var;
7708 	    }
7709 
7710 	  if (mask)
7711 	    {
7712 	      tree utype;
7713 	      mask_arg = mask_op;
7714 	      if (modifier == NARROW)
7715 		{
7716 		  var = vect_get_new_ssa_name (mask_halfvectype,
7717 					       vect_simple_var);
7718 		  gassign *new_stmt
7719 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7720 							: VEC_UNPACK_LO_EXPR,
7721 					   mask_op);
7722 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7723 		  mask_arg = var;
7724 		}
7725 	      tree optype = TREE_TYPE (mask_arg);
7726 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7727 		utype = masktype;
7728 	      else
7729 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7730 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
7731 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7732 	      gassign *new_stmt
7733 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7734 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7735 	      mask_arg = var;
7736 	      if (!useless_type_conversion_p (masktype, utype))
7737 		{
7738 		  gcc_assert (TYPE_PRECISION (utype)
7739 			      <= TYPE_PRECISION (masktype));
7740 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7741 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7742 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7743 		  mask_arg = var;
7744 		}
7745 	    }
7746 
7747 	  gcall *new_stmt
7748 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7749 	  stmt_vec_info new_stmt_info
7750 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7751 
7752 	  if (prev_stmt_info == NULL)
7753 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7754 	  else
7755 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7756 	  prev_stmt_info = new_stmt_info;
7757 	}
7758       return true;
7759     }
7760   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7761     return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7762 
7763   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7764     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7765 
7766   if (grouped_store)
7767     {
7768       /* FORNOW */
7769       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7770 
7771       /* We vectorize all the stmts of the interleaving group when we
7772 	 reach the last stmt in the group.  */
7773       if (DR_GROUP_STORE_COUNT (first_stmt_info)
7774 	  < DR_GROUP_SIZE (first_stmt_info)
7775 	  && !slp)
7776 	{
7777 	  *vec_stmt = NULL;
7778 	  return true;
7779 	}
7780 
7781       if (slp)
7782         {
7783           grouped_store = false;
7784           /* VEC_NUM is the number of vect stmts to be created for this
7785              group.  */
7786           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7787 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7788 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7789 		      == first_stmt_info);
7790 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7791 	  op = vect_get_store_rhs (first_stmt_info);
7792         }
7793       else
7794         /* VEC_NUM is the number of vect stmts to be created for this
7795            group.  */
7796 	vec_num = group_size;
7797 
7798       ref_type = get_group_alias_ptr_type (first_stmt_info);
7799     }
7800   else
7801     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7802 
7803   if (dump_enabled_p ())
7804     dump_printf_loc (MSG_NOTE, vect_location,
7805                      "transform store. ncopies = %d\n", ncopies);
7806 
7807   if (memory_access_type == VMAT_ELEMENTWISE
7808       || memory_access_type == VMAT_STRIDED_SLP)
7809     {
7810       gimple_stmt_iterator incr_gsi;
7811       bool insert_after;
7812       gimple *incr;
7813       tree offvar;
7814       tree ivstep;
7815       tree running_off;
7816       tree stride_base, stride_step, alias_off;
7817       tree vec_oprnd;
7818       tree dr_offset;
7819       unsigned int g;
7820       /* Checked by get_load_store_type.  */
7821       unsigned int const_nunits = nunits.to_constant ();
7822 
7823       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7824       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7825 
7826       dr_offset = get_dr_vinfo_offset (first_dr_info);
7827       stride_base
7828 	= fold_build_pointer_plus
7829 	    (DR_BASE_ADDRESS (first_dr_info->dr),
7830 	     size_binop (PLUS_EXPR,
7831 			 convert_to_ptrofftype (dr_offset),
7832 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7833       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7834 
7835       /* For a store with loop-invariant (but other than power-of-2)
7836          stride (i.e. not a grouped access) like so:
7837 
7838 	   for (i = 0; i < n; i += stride)
7839 	     array[i] = ...;
7840 
7841 	 we generate a new induction variable and new stores from
7842 	 the components of the (vectorized) rhs:
7843 
7844 	   for (j = 0; ; j += VF*stride)
7845 	     vectemp = ...;
7846 	     tmp1 = vectemp[0];
7847 	     array[j] = tmp1;
7848 	     tmp2 = vectemp[1];
7849 	     array[j + stride] = tmp2;
7850 	     ...
7851          */
7852 
7853       unsigned nstores = const_nunits;
7854       unsigned lnel = 1;
7855       tree ltype = elem_type;
7856       tree lvectype = vectype;
7857       if (slp)
7858 	{
7859 	  if (group_size < const_nunits
7860 	      && const_nunits % group_size == 0)
7861 	    {
7862 	      nstores = const_nunits / group_size;
7863 	      lnel = group_size;
7864 	      ltype = build_vector_type (elem_type, group_size);
7865 	      lvectype = vectype;
7866 
7867 	      /* First check if vec_extract optab doesn't support extraction
7868 		 of vector elts directly.  */
7869 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7870 	      machine_mode vmode;
7871 	      if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7872 		  || !related_vector_mode (TYPE_MODE (vectype), elmode,
7873 					   group_size).exists (&vmode)
7874 		  || (convert_optab_handler (vec_extract_optab,
7875 					     TYPE_MODE (vectype), vmode)
7876 		      == CODE_FOR_nothing))
7877 		{
7878 		  /* Try to avoid emitting an extract of vector elements
7879 		     by performing the extracts using an integer type of the
7880 		     same size, extracting from a vector of those and then
7881 		     re-interpreting it as the original vector type if
7882 		     supported.  */
7883 		  unsigned lsize
7884 		    = group_size * GET_MODE_BITSIZE (elmode);
7885 		  unsigned int lnunits = const_nunits / group_size;
7886 		  /* If we can't construct such a vector fall back to
7887 		     element extracts from the original vector type and
7888 		     element size stores.  */
7889 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
7890 		      && VECTOR_MODE_P (TYPE_MODE (vectype))
7891 		      && related_vector_mode (TYPE_MODE (vectype), elmode,
7892 					      lnunits).exists (&vmode)
7893 		      && (convert_optab_handler (vec_extract_optab,
7894 						 vmode, elmode)
7895 			  != CODE_FOR_nothing))
7896 		    {
7897 		      nstores = lnunits;
7898 		      lnel = group_size;
7899 		      ltype = build_nonstandard_integer_type (lsize, 1);
7900 		      lvectype = build_vector_type (ltype, nstores);
7901 		    }
7902 		  /* Else fall back to vector extraction anyway.
7903 		     Fewer stores are more important than avoiding spilling
7904 		     of the vector we extract from.  Compared to the
7905 		     construction case in vectorizable_load no store-forwarding
7906 		     issue exists here for reasonable archs.  */
7907 		}
7908 	    }
7909 	  else if (group_size >= const_nunits
7910 		   && group_size % const_nunits == 0)
7911 	    {
7912 	      nstores = 1;
7913 	      lnel = const_nunits;
7914 	      ltype = vectype;
7915 	      lvectype = vectype;
7916 	    }
7917 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7918 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7919 	}
7920 
7921       ivstep = stride_step;
7922       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7923 			    build_int_cst (TREE_TYPE (ivstep), vf));
7924 
7925       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7926 
7927       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7928       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7929       create_iv (stride_base, ivstep, NULL,
7930 		 loop, &incr_gsi, insert_after,
7931 		 &offvar, NULL);
7932       incr = gsi_stmt (incr_gsi);
7933       loop_vinfo->add_stmt (incr);
7934 
7935       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7936 
7937       prev_stmt_info = NULL;
7938       alias_off = build_int_cst (ref_type, 0);
7939       stmt_vec_info next_stmt_info = first_stmt_info;
7940       for (g = 0; g < group_size; g++)
7941 	{
7942 	  running_off = offvar;
7943 	  if (g)
7944 	    {
7945 	      tree size = TYPE_SIZE_UNIT (ltype);
7946 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7947 				      size);
7948 	      tree newoff = copy_ssa_name (running_off, NULL);
7949 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7950 					  running_off, pos);
7951 	      vect_finish_stmt_generation (stmt_info, incr, gsi);
7952 	      running_off = newoff;
7953 	    }
7954 	  unsigned int group_el = 0;
7955 	  unsigned HOST_WIDE_INT
7956 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7957 	  for (j = 0; j < ncopies; j++)
7958 	    {
7959 	      /* We've set op and dt above, from vect_get_store_rhs,
7960 		 and first_stmt_info == stmt_info.  */
7961 	      if (j == 0)
7962 		{
7963 		  if (slp)
7964 		    {
7965 		      vect_get_vec_defs (op, NULL_TREE, stmt_info,
7966 					 &vec_oprnds, NULL, slp_node);
7967 		      vec_oprnd = vec_oprnds[0];
7968 		    }
7969 		  else
7970 		    {
7971 		      op = vect_get_store_rhs (next_stmt_info);
7972 		      vec_oprnd = vect_get_vec_def_for_operand
7973 			(op, next_stmt_info);
7974 		    }
7975 		}
7976 	      else
7977 		{
7978 		  if (slp)
7979 		    vec_oprnd = vec_oprnds[j];
7980 		  else
7981 		    vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7982 								vec_oprnd);
7983 		}
7984 	      /* Pun the vector to extract from if necessary.  */
7985 	      if (lvectype != vectype)
7986 		{
7987 		  tree tem = make_ssa_name (lvectype);
7988 		  gimple *pun
7989 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7990 							lvectype, vec_oprnd));
7991 		  vect_finish_stmt_generation (stmt_info, pun, gsi);
7992 		  vec_oprnd = tem;
7993 		}
7994 	      for (i = 0; i < nstores; i++)
7995 		{
7996 		  tree newref, newoff;
7997 		  gimple *incr, *assign;
7998 		  tree size = TYPE_SIZE (ltype);
7999 		  /* Extract the i'th component.  */
8000 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8001 					  bitsize_int (i), size);
8002 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8003 					   size, pos);
8004 
8005 		  elem = force_gimple_operand_gsi (gsi, elem, true,
8006 						   NULL_TREE, true,
8007 						   GSI_SAME_STMT);
8008 
8009 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
8010 						 group_el * elsz);
8011 		  newref = build2 (MEM_REF, ltype,
8012 				   running_off, this_off);
8013 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8014 
8015 		  /* And store it to *running_off.  */
8016 		  assign = gimple_build_assign (newref, elem);
8017 		  stmt_vec_info assign_info
8018 		    = vect_finish_stmt_generation (stmt_info, assign, gsi);
8019 
8020 		  group_el += lnel;
8021 		  if (! slp
8022 		      || group_el == group_size)
8023 		    {
8024 		      newoff = copy_ssa_name (running_off, NULL);
8025 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8026 						  running_off, stride_step);
8027 		      vect_finish_stmt_generation (stmt_info, incr, gsi);
8028 
8029 		      running_off = newoff;
8030 		      group_el = 0;
8031 		    }
8032 		  if (g == group_size - 1
8033 		      && !slp)
8034 		    {
8035 		      if (j == 0 && i == 0)
8036 			STMT_VINFO_VEC_STMT (stmt_info)
8037 			    = *vec_stmt = assign_info;
8038 		      else
8039 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8040 		      prev_stmt_info = assign_info;
8041 		    }
8042 		}
8043 	    }
8044 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8045 	  if (slp)
8046 	    break;
8047 	}
8048 
8049       vec_oprnds.release ();
8050       return true;
8051     }
8052 
8053   auto_vec<tree> dr_chain (group_size);
8054   oprnds.create (group_size);
8055 
8056   /* Gather-scatter accesses perform only component accesses, alignment
8057      is irrelevant for them.  */
8058   if (memory_access_type == VMAT_GATHER_SCATTER)
8059     alignment_support_scheme = dr_unaligned_supported;
8060   else
8061     alignment_support_scheme
8062       = vect_supportable_dr_alignment (first_dr_info, false);
8063 
8064   gcc_assert (alignment_support_scheme);
8065   vec_loop_masks *loop_masks
8066     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8067        ? &LOOP_VINFO_MASKS (loop_vinfo)
8068        : NULL);
8069   /* Targets with store-lane instructions must not require explicit
8070      realignment.  vect_supportable_dr_alignment always returns either
8071      dr_aligned or dr_unaligned_supported for masked operations.  */
8072   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8073 	       && !mask
8074 	       && !loop_masks)
8075 	      || alignment_support_scheme == dr_aligned
8076 	      || alignment_support_scheme == dr_unaligned_supported);
8077 
8078   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8079       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8080     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8081 
8082   tree bump;
8083   tree vec_offset = NULL_TREE;
8084   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8085     {
8086       aggr_type = NULL_TREE;
8087       bump = NULL_TREE;
8088     }
8089   else if (memory_access_type == VMAT_GATHER_SCATTER)
8090     {
8091       aggr_type = elem_type;
8092       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8093 				       &bump, &vec_offset);
8094     }
8095   else
8096     {
8097       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8098 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8099       else
8100 	aggr_type = vectype;
8101       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8102 					  memory_access_type);
8103     }
8104 
8105   if (mask)
8106     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8107 
8108   /* In case the vectorization factor (VF) is bigger than the number
8109      of elements that we can fit in a vectype (nunits), we have to generate
8110      more than one vector stmt - i.e - we need to "unroll" the
8111      vector stmt by a factor VF/nunits.  For more details see documentation in
8112      vect_get_vec_def_for_copy_stmt.  */
8113 
8114   /* In case of interleaving (non-unit grouped access):
8115 
8116         S1:  &base + 2 = x2
8117         S2:  &base = x0
8118         S3:  &base + 1 = x1
8119         S4:  &base + 3 = x3
8120 
8121      We create vectorized stores starting from base address (the access of the
8122      first stmt in the chain (S2 in the above example), when the last store stmt
8123      of the chain (S4) is reached:
8124 
8125         VS1: &base = vx2
8126 	VS2: &base + vec_size*1 = vx0
8127 	VS3: &base + vec_size*2 = vx1
8128 	VS4: &base + vec_size*3 = vx3
8129 
8130      Then permutation statements are generated:
8131 
8132 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8133 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8134 	...
8135 
8136      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8137      (the order of the data-refs in the output of vect_permute_store_chain
8138      corresponds to the order of scalar stmts in the interleaving chain - see
8139      the documentation of vect_permute_store_chain()).
8140 
8141      In case of both multiple types and interleaving, above vector stores and
8142      permutation stmts are created for every copy.  The result vector stmts are
8143      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8144      STMT_VINFO_RELATED_STMT for the next copies.
8145   */
8146 
8147   prev_stmt_info = NULL;
8148   tree vec_mask = NULL_TREE;
8149   for (j = 0; j < ncopies; j++)
8150     {
8151       stmt_vec_info new_stmt_info;
8152       if (j == 0)
8153 	{
8154           if (slp)
8155             {
8156 	      /* Get vectorized arguments for SLP_NODE.  */
8157 	      vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8158 				 NULL, slp_node);
8159 
8160               vec_oprnd = vec_oprnds[0];
8161             }
8162           else
8163             {
8164 	      /* For interleaved stores we collect vectorized defs for all the
8165 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8166 		 used as an input to vect_permute_store_chain(), and OPRNDS as
8167 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8168 
8169 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8170 		 OPRNDS are of size 1.  */
8171 	      stmt_vec_info next_stmt_info = first_stmt_info;
8172 	      for (i = 0; i < group_size; i++)
8173 		{
8174 		  /* Since gaps are not supported for interleaved stores,
8175 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
8176 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
8177 		     that there is no interleaving, DR_GROUP_SIZE is 1,
8178 		     and only one iteration of the loop will be executed.  */
8179 		  op = vect_get_store_rhs (next_stmt_info);
8180 		  vec_oprnd = vect_get_vec_def_for_operand
8181 		    (op, next_stmt_info);
8182 		  dr_chain.quick_push (vec_oprnd);
8183 		  oprnds.quick_push (vec_oprnd);
8184 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8185 		}
8186 	      if (mask)
8187 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8188 							 mask_vectype);
8189 	    }
8190 
8191 	  /* We should have catched mismatched types earlier.  */
8192 	  gcc_assert (useless_type_conversion_p (vectype,
8193 						 TREE_TYPE (vec_oprnd)));
8194 	  bool simd_lane_access_p
8195 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8196 	  if (simd_lane_access_p
8197 	      && !loop_masks
8198 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8199 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8200 	      && integer_zerop (get_dr_vinfo_offset (first_dr_info))
8201 	      && integer_zerop (DR_INIT (first_dr_info->dr))
8202 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8203 					get_alias_set (TREE_TYPE (ref_type))))
8204 	    {
8205 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8206 	      dataref_offset = build_int_cst (ref_type, 0);
8207 	    }
8208 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8209 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8210 					 &dataref_ptr, &vec_offset);
8211 	  else
8212 	    dataref_ptr
8213 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8214 					  simd_lane_access_p ? loop : NULL,
8215 					  offset, &dummy, gsi, &ptr_incr,
8216 					  simd_lane_access_p, NULL_TREE, bump);
8217 	}
8218       else
8219 	{
8220 	  /* For interleaved stores we created vectorized defs for all the
8221 	     defs stored in OPRNDS in the previous iteration (previous copy).
8222 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
8223 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8224 	     next copy.
8225 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8226 	     OPRNDS are of size 1.  */
8227 	  for (i = 0; i < group_size; i++)
8228 	    {
8229 	      op = oprnds[i];
8230 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8231 	      dr_chain[i] = vec_oprnd;
8232 	      oprnds[i] = vec_oprnd;
8233 	    }
8234 	  if (mask)
8235 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8236 	  if (dataref_offset)
8237 	    dataref_offset
8238 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8239 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8240 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8241 	  else
8242 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8243 					   stmt_info, bump);
8244 	}
8245 
8246       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8247 	{
8248 	  tree vec_array;
8249 
8250 	  /* Get an array into which we can store the individual vectors.  */
8251 	  vec_array = create_vector_array (vectype, vec_num);
8252 
8253 	  /* Invalidate the current contents of VEC_ARRAY.  This should
8254 	     become an RTL clobber too, which prevents the vector registers
8255 	     from being upward-exposed.  */
8256 	  vect_clobber_variable (stmt_info, gsi, vec_array);
8257 
8258 	  /* Store the individual vectors into the array.  */
8259 	  for (i = 0; i < vec_num; i++)
8260 	    {
8261 	      vec_oprnd = dr_chain[i];
8262 	      write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8263 	    }
8264 
8265 	  tree final_mask = NULL;
8266 	  if (loop_masks)
8267 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8268 					     vectype, j);
8269 	  if (vec_mask)
8270 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8271 						  vec_mask, gsi);
8272 
8273 	  gcall *call;
8274 	  if (final_mask)
8275 	    {
8276 	      /* Emit:
8277 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8278 				     VEC_ARRAY).  */
8279 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8280 	      tree alias_ptr = build_int_cst (ref_type, align);
8281 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8282 						 dataref_ptr, alias_ptr,
8283 						 final_mask, vec_array);
8284 	    }
8285 	  else
8286 	    {
8287 	      /* Emit:
8288 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
8289 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8290 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8291 						 vec_array);
8292 	      gimple_call_set_lhs (call, data_ref);
8293 	    }
8294 	  gimple_call_set_nothrow (call, true);
8295 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8296 
8297 	  /* Record that VEC_ARRAY is now dead.  */
8298 	  vect_clobber_variable (stmt_info, gsi, vec_array);
8299 	}
8300       else
8301 	{
8302 	  new_stmt_info = NULL;
8303 	  if (grouped_store)
8304 	    {
8305 	      if (j == 0)
8306 		result_chain.create (group_size);
8307 	      /* Permute.  */
8308 	      vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8309 					&result_chain);
8310 	    }
8311 
8312 	  stmt_vec_info next_stmt_info = first_stmt_info;
8313 	  for (i = 0; i < vec_num; i++)
8314 	    {
8315 	      unsigned misalign;
8316 	      unsigned HOST_WIDE_INT align;
8317 
8318 	      tree final_mask = NULL_TREE;
8319 	      if (loop_masks)
8320 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8321 						 vec_num * ncopies,
8322 						 vectype, vec_num * j + i);
8323 	      if (vec_mask)
8324 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8325 						      vec_mask, gsi);
8326 
8327 	      if (memory_access_type == VMAT_GATHER_SCATTER)
8328 		{
8329 		  tree scale = size_int (gs_info.scale);
8330 		  gcall *call;
8331 		  if (loop_masks)
8332 		    call = gimple_build_call_internal
8333 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8334 		       scale, vec_oprnd, final_mask);
8335 		  else
8336 		    call = gimple_build_call_internal
8337 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8338 		       scale, vec_oprnd);
8339 		  gimple_call_set_nothrow (call, true);
8340 		  new_stmt_info
8341 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
8342 		  break;
8343 		}
8344 
8345 	      if (i > 0)
8346 		/* Bump the vector pointer.  */
8347 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8348 					       stmt_info, bump);
8349 
8350 	      if (slp)
8351 		vec_oprnd = vec_oprnds[i];
8352 	      else if (grouped_store)
8353 		/* For grouped stores vectorized defs are interleaved in
8354 		   vect_permute_store_chain().  */
8355 		vec_oprnd = result_chain[i];
8356 
8357 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8358 	      if (aligned_access_p (first_dr_info))
8359 		misalign = 0;
8360 	      else if (DR_MISALIGNMENT (first_dr_info) == -1)
8361 		{
8362 		  align = dr_alignment (vect_dr_behavior (first_dr_info));
8363 		  misalign = 0;
8364 		}
8365 	      else
8366 		misalign = DR_MISALIGNMENT (first_dr_info);
8367 	      if (dataref_offset == NULL_TREE
8368 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
8369 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8370 					misalign);
8371 
8372 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8373 		{
8374 		  tree perm_mask = perm_mask_for_reverse (vectype);
8375 		  tree perm_dest = vect_create_destination_var
8376 		    (vect_get_store_rhs (stmt_info), vectype);
8377 		  tree new_temp = make_ssa_name (perm_dest);
8378 
8379 		  /* Generate the permute statement.  */
8380 		  gimple *perm_stmt
8381 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8382 					   vec_oprnd, perm_mask);
8383 		  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8384 
8385 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8386 		  vec_oprnd = new_temp;
8387 		}
8388 
8389 	      /* Arguments are ready.  Create the new vector stmt.  */
8390 	      if (final_mask)
8391 		{
8392 		  align = least_bit_hwi (misalign | align);
8393 		  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8394 		  gcall *call
8395 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
8396 						  dataref_ptr, ptr,
8397 						  final_mask, vec_oprnd);
8398 		  gimple_call_set_nothrow (call, true);
8399 		  new_stmt_info
8400 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
8401 		}
8402 	      else
8403 		{
8404 		  data_ref = fold_build2 (MEM_REF, vectype,
8405 					  dataref_ptr,
8406 					  dataref_offset
8407 					  ? dataref_offset
8408 					  : build_int_cst (ref_type, 0));
8409 		  if (aligned_access_p (first_dr_info))
8410 		    ;
8411 		  else if (DR_MISALIGNMENT (first_dr_info) == -1)
8412 		    TREE_TYPE (data_ref)
8413 		      = build_aligned_type (TREE_TYPE (data_ref),
8414 					    align * BITS_PER_UNIT);
8415 		  else
8416 		    TREE_TYPE (data_ref)
8417 		      = build_aligned_type (TREE_TYPE (data_ref),
8418 					    TYPE_ALIGN (elem_type));
8419 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8420 		  gassign *new_stmt
8421 		    = gimple_build_assign (data_ref, vec_oprnd);
8422 		  new_stmt_info
8423 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8424 		}
8425 
8426 	      if (slp)
8427 		continue;
8428 
8429 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8430 	      if (!next_stmt_info)
8431 		break;
8432 	    }
8433 	}
8434       if (!slp)
8435 	{
8436 	  if (j == 0)
8437 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8438 	  else
8439 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8440 	  prev_stmt_info = new_stmt_info;
8441 	}
8442     }
8443 
8444   oprnds.release ();
8445   result_chain.release ();
8446   vec_oprnds.release ();
8447 
8448   return true;
8449 }
8450 
8451 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8452    VECTOR_CST mask.  No checks are made that the target platform supports the
8453    mask, so callers may wish to test can_vec_perm_const_p separately, or use
8454    vect_gen_perm_mask_checked.  */
8455 
8456 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8457 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8458 {
8459   tree mask_type;
8460 
8461   poly_uint64 nunits = sel.length ();
8462   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8463 
8464   mask_type = build_vector_type (ssizetype, nunits);
8465   return vec_perm_indices_to_tree (mask_type, sel);
8466 }
8467 
8468 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
8469    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
8470 
8471 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8472 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8473 {
8474   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8475   return vect_gen_perm_mask_any (vectype, sel);
8476 }
8477 
8478 /* Given a vector variable X and Y, that was generated for the scalar
8479    STMT_INFO, generate instructions to permute the vector elements of X and Y
8480    using permutation mask MASK_VEC, insert them at *GSI and return the
8481    permuted vector variable.  */
8482 
8483 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8484 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8485 		      gimple_stmt_iterator *gsi)
8486 {
8487   tree vectype = TREE_TYPE (x);
8488   tree perm_dest, data_ref;
8489   gimple *perm_stmt;
8490 
8491   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8492   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8493     perm_dest = vect_create_destination_var (scalar_dest, vectype);
8494   else
8495     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8496   data_ref = make_ssa_name (perm_dest);
8497 
8498   /* Generate the permute statement.  */
8499   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8500   vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8501 
8502   return data_ref;
8503 }
8504 
8505 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8506    inserting them on the loops preheader edge.  Returns true if we
8507    were successful in doing so (and thus STMT_INFO can be moved then),
8508    otherwise returns false.  */
8509 
8510 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8511 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8512 {
8513   ssa_op_iter i;
8514   tree op;
8515   bool any = false;
8516 
8517   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8518     {
8519       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8520       if (!gimple_nop_p (def_stmt)
8521 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8522 	{
8523 	  /* Make sure we don't need to recurse.  While we could do
8524 	     so in simple cases when there are more complex use webs
8525 	     we don't have an easy way to preserve stmt order to fulfil
8526 	     dependencies within them.  */
8527 	  tree op2;
8528 	  ssa_op_iter i2;
8529 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
8530 	    return false;
8531 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8532 	    {
8533 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8534 	      if (!gimple_nop_p (def_stmt2)
8535 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8536 		return false;
8537 	    }
8538 	  any = true;
8539 	}
8540     }
8541 
8542   if (!any)
8543     return true;
8544 
8545   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8546     {
8547       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8548       if (!gimple_nop_p (def_stmt)
8549 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8550 	{
8551 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8552 	  gsi_remove (&gsi, false);
8553 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8554 	}
8555     }
8556 
8557   return true;
8558 }
8559 
8560 /* vectorizable_load.
8561 
8562    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8563    that can be vectorized.
8564    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8565    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8566    Return true if STMT_INFO is vectorizable in this way.  */
8567 
8568 static bool
vectorizable_load(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,slp_instance slp_node_instance,stmt_vector_for_cost * cost_vec)8569 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8570 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
8571 		   slp_instance slp_node_instance,
8572 		   stmt_vector_for_cost *cost_vec)
8573 {
8574   tree scalar_dest;
8575   tree vec_dest = NULL;
8576   tree data_ref = NULL;
8577   stmt_vec_info prev_stmt_info;
8578   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8579   class loop *loop = NULL;
8580   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8581   bool nested_in_vect_loop = false;
8582   tree elem_type;
8583   tree new_temp;
8584   machine_mode mode;
8585   tree dummy;
8586   enum dr_alignment_support alignment_support_scheme;
8587   tree dataref_ptr = NULL_TREE;
8588   tree dataref_offset = NULL_TREE;
8589   gimple *ptr_incr = NULL;
8590   int ncopies;
8591   int i, j;
8592   unsigned int group_size;
8593   poly_uint64 group_gap_adj;
8594   tree msq = NULL_TREE, lsq;
8595   tree offset = NULL_TREE;
8596   tree byte_offset = NULL_TREE;
8597   tree realignment_token = NULL_TREE;
8598   gphi *phi = NULL;
8599   vec<tree> dr_chain = vNULL;
8600   bool grouped_load = false;
8601   stmt_vec_info first_stmt_info;
8602   stmt_vec_info first_stmt_info_for_drptr = NULL;
8603   bool compute_in_loop = false;
8604   class loop *at_loop;
8605   int vec_num;
8606   bool slp = (slp_node != NULL);
8607   bool slp_perm = false;
8608   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8609   poly_uint64 vf;
8610   tree aggr_type;
8611   gather_scatter_info gs_info;
8612   vec_info *vinfo = stmt_info->vinfo;
8613   tree ref_type;
8614   enum vect_def_type mask_dt = vect_unknown_def_type;
8615 
8616   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8617     return false;
8618 
8619   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8620       && ! vec_stmt)
8621     return false;
8622 
8623   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8624   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8625     {
8626       scalar_dest = gimple_assign_lhs (assign);
8627       if (TREE_CODE (scalar_dest) != SSA_NAME)
8628 	return false;
8629 
8630       tree_code code = gimple_assign_rhs_code (assign);
8631       if (code != ARRAY_REF
8632 	  && code != BIT_FIELD_REF
8633 	  && code != INDIRECT_REF
8634 	  && code != COMPONENT_REF
8635 	  && code != IMAGPART_EXPR
8636 	  && code != REALPART_EXPR
8637 	  && code != MEM_REF
8638 	  && TREE_CODE_CLASS (code) != tcc_declaration)
8639 	return false;
8640     }
8641   else
8642     {
8643       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8644       if (!call || !gimple_call_internal_p (call))
8645 	return false;
8646 
8647       internal_fn ifn = gimple_call_internal_fn (call);
8648       if (!internal_load_fn_p (ifn))
8649 	return false;
8650 
8651       scalar_dest = gimple_call_lhs (call);
8652       if (!scalar_dest)
8653 	return false;
8654 
8655       int mask_index = internal_fn_mask_index (ifn);
8656       if (mask_index >= 0)
8657 	{
8658 	  mask = gimple_call_arg (call, mask_index);
8659 	  if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
8660 				       &mask_vectype))
8661 	    return false;
8662 	}
8663     }
8664 
8665   if (!STMT_VINFO_DATA_REF (stmt_info))
8666     return false;
8667 
8668   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8669   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8670 
8671   if (loop_vinfo)
8672     {
8673       loop = LOOP_VINFO_LOOP (loop_vinfo);
8674       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8675       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8676     }
8677   else
8678     vf = 1;
8679 
8680   /* Multiple types in SLP are handled by creating the appropriate number of
8681      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
8682      case of SLP.  */
8683   if (slp)
8684     ncopies = 1;
8685   else
8686     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8687 
8688   gcc_assert (ncopies >= 1);
8689 
8690   /* FORNOW. This restriction should be relaxed.  */
8691   if (nested_in_vect_loop && ncopies > 1)
8692     {
8693       if (dump_enabled_p ())
8694         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8695                          "multiple types in nested loop.\n");
8696       return false;
8697     }
8698 
8699   /* Invalidate assumptions made by dependence analysis when vectorization
8700      on the unrolled body effectively re-orders stmts.  */
8701   if (ncopies > 1
8702       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8703       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8704 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8705     {
8706       if (dump_enabled_p ())
8707 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8708 			 "cannot perform implicit CSE when unrolling "
8709 			 "with negative dependence distance\n");
8710       return false;
8711     }
8712 
8713   elem_type = TREE_TYPE (vectype);
8714   mode = TYPE_MODE (vectype);
8715 
8716   /* FORNOW. In some cases can vectorize even if data-type not supported
8717     (e.g. - data copies).  */
8718   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8719     {
8720       if (dump_enabled_p ())
8721         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8722                          "Aligned load, but unsupported type.\n");
8723       return false;
8724     }
8725 
8726   /* Check if the load is a part of an interleaving chain.  */
8727   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8728     {
8729       grouped_load = true;
8730       /* FORNOW */
8731       gcc_assert (!nested_in_vect_loop);
8732       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8733 
8734       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8735       group_size = DR_GROUP_SIZE (first_stmt_info);
8736 
8737       /* Refuse non-SLP vectorization of SLP-only groups.  */
8738       if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8739 	{
8740 	  if (dump_enabled_p ())
8741 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8742 			     "cannot vectorize load in non-SLP mode.\n");
8743 	  return false;
8744 	}
8745 
8746       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8747 	slp_perm = true;
8748 
8749       /* Invalidate assumptions made by dependence analysis when vectorization
8750 	 on the unrolled body effectively re-orders stmts.  */
8751       if (!PURE_SLP_STMT (stmt_info)
8752 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8753 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8754 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8755 	{
8756 	  if (dump_enabled_p ())
8757 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8758 			     "cannot perform implicit CSE when performing "
8759 			     "group loads with negative dependence distance\n");
8760 	  return false;
8761 	}
8762     }
8763   else
8764     group_size = 1;
8765 
8766   vect_memory_access_type memory_access_type;
8767   if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8768 			    &memory_access_type, &gs_info))
8769     return false;
8770 
8771   if (mask)
8772     {
8773       if (memory_access_type == VMAT_CONTIGUOUS)
8774 	{
8775 	  machine_mode vec_mode = TYPE_MODE (vectype);
8776 	  if (!VECTOR_MODE_P (vec_mode)
8777 	      || !can_vec_mask_load_store_p (vec_mode,
8778 					     TYPE_MODE (mask_vectype), true))
8779 	    return false;
8780 	}
8781       else if (memory_access_type != VMAT_LOAD_STORE_LANES
8782 	       && memory_access_type != VMAT_GATHER_SCATTER)
8783 	{
8784 	  if (dump_enabled_p ())
8785 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8786 			     "unsupported access type for masked load.\n");
8787 	  return false;
8788 	}
8789     }
8790 
8791   if (!vec_stmt) /* transformation not required.  */
8792     {
8793       if (!slp)
8794 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8795 
8796       if (loop_vinfo
8797 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8798 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8799 				  memory_access_type, &gs_info, mask);
8800 
8801       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8802       vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8803 			    slp_node_instance, slp_node, cost_vec);
8804       return true;
8805     }
8806 
8807   if (!slp)
8808     gcc_assert (memory_access_type
8809 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8810 
8811   if (dump_enabled_p ())
8812     dump_printf_loc (MSG_NOTE, vect_location,
8813                      "transform load. ncopies = %d\n", ncopies);
8814 
8815   /* Transform.  */
8816 
8817   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8818   ensure_base_align (dr_info);
8819 
8820   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8821     {
8822       vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8823       return true;
8824     }
8825 
8826   if (memory_access_type == VMAT_INVARIANT)
8827     {
8828       gcc_assert (!grouped_load && !mask && !bb_vinfo);
8829       /* If we have versioned for aliasing or the loop doesn't
8830 	 have any data dependencies that would preclude this,
8831 	 then we are sure this is a loop invariant load and
8832 	 thus we can insert it on the preheader edge.  */
8833       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8834 		      && !nested_in_vect_loop
8835 		      && hoist_defs_of_uses (stmt_info, loop));
8836       if (hoist_p)
8837 	{
8838 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8839 	  if (dump_enabled_p ())
8840 	    dump_printf_loc (MSG_NOTE, vect_location,
8841 			     "hoisting out of the vectorized loop: %G", stmt);
8842 	  scalar_dest = copy_ssa_name (scalar_dest);
8843 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8844 	  gsi_insert_on_edge_immediate
8845 	    (loop_preheader_edge (loop),
8846 	     gimple_build_assign (scalar_dest, rhs));
8847 	}
8848       /* These copies are all equivalent, but currently the representation
8849 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
8850       prev_stmt_info = NULL;
8851       gimple_stmt_iterator gsi2 = *gsi;
8852       gsi_next (&gsi2);
8853       for (j = 0; j < ncopies; j++)
8854 	{
8855 	  stmt_vec_info new_stmt_info;
8856 	  if (hoist_p)
8857 	    {
8858 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
8859 					   vectype, NULL);
8860 	      gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8861 	      new_stmt_info = vinfo->add_stmt (new_stmt);
8862 	    }
8863 	  else
8864 	    {
8865 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
8866 					   vectype, &gsi2);
8867 	      new_stmt_info = vinfo->lookup_def (new_temp);
8868 	    }
8869 	  if (slp)
8870 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8871 	  else if (j == 0)
8872 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8873 	  else
8874 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8875 	  prev_stmt_info = new_stmt_info;
8876 	}
8877       return true;
8878     }
8879 
8880   if (memory_access_type == VMAT_ELEMENTWISE
8881       || memory_access_type == VMAT_STRIDED_SLP)
8882     {
8883       gimple_stmt_iterator incr_gsi;
8884       bool insert_after;
8885       gimple *incr;
8886       tree offvar;
8887       tree ivstep;
8888       tree running_off;
8889       vec<constructor_elt, va_gc> *v = NULL;
8890       tree stride_base, stride_step, alias_off;
8891       /* Checked by get_load_store_type.  */
8892       unsigned int const_nunits = nunits.to_constant ();
8893       unsigned HOST_WIDE_INT cst_offset = 0;
8894       tree dr_offset;
8895 
8896       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8897       gcc_assert (!nested_in_vect_loop);
8898 
8899       if (grouped_load)
8900 	{
8901 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8902 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8903 	}
8904       else
8905 	{
8906 	  first_stmt_info = stmt_info;
8907 	  first_dr_info = dr_info;
8908 	}
8909       if (slp && grouped_load)
8910 	{
8911 	  group_size = DR_GROUP_SIZE (first_stmt_info);
8912 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
8913 	}
8914       else
8915 	{
8916 	  if (grouped_load)
8917 	    cst_offset
8918 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8919 		 * vect_get_place_in_interleaving_chain (stmt_info,
8920 							 first_stmt_info));
8921 	  group_size = 1;
8922 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8923 	}
8924 
8925       dr_offset = get_dr_vinfo_offset (first_dr_info);
8926       stride_base
8927 	= fold_build_pointer_plus
8928 	    (DR_BASE_ADDRESS (first_dr_info->dr),
8929 	     size_binop (PLUS_EXPR,
8930 			 convert_to_ptrofftype (dr_offset),
8931 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8932       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8933 
8934       /* For a load with loop-invariant (but other than power-of-2)
8935          stride (i.e. not a grouped access) like so:
8936 
8937 	   for (i = 0; i < n; i += stride)
8938 	     ... = array[i];
8939 
8940 	 we generate a new induction variable and new accesses to
8941 	 form a new vector (or vectors, depending on ncopies):
8942 
8943 	   for (j = 0; ; j += VF*stride)
8944 	     tmp1 = array[j];
8945 	     tmp2 = array[j + stride];
8946 	     ...
8947 	     vectemp = {tmp1, tmp2, ...}
8948          */
8949 
8950       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8951 			    build_int_cst (TREE_TYPE (stride_step), vf));
8952 
8953       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8954 
8955       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8956       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8957       create_iv (stride_base, ivstep, NULL,
8958 		 loop, &incr_gsi, insert_after,
8959 		 &offvar, NULL);
8960       incr = gsi_stmt (incr_gsi);
8961       loop_vinfo->add_stmt (incr);
8962 
8963       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8964 
8965       prev_stmt_info = NULL;
8966       running_off = offvar;
8967       alias_off = build_int_cst (ref_type, 0);
8968       int nloads = const_nunits;
8969       int lnel = 1;
8970       tree ltype = TREE_TYPE (vectype);
8971       tree lvectype = vectype;
8972       auto_vec<tree> dr_chain;
8973       if (memory_access_type == VMAT_STRIDED_SLP)
8974 	{
8975 	  if (group_size < const_nunits)
8976 	    {
8977 	      /* First check if vec_init optab supports construction from vector
8978 		 elts directly.  Otherwise avoid emitting a constructor of
8979 		 vector elements by performing the loads using an integer type
8980 		 of the same size, constructing a vector of those and then
8981 		 re-interpreting it as the original vector type.  This avoids a
8982 		 huge runtime penalty due to the general inability to perform
8983 		 store forwarding from smaller stores to a larger load.  */
8984 	      tree ptype;
8985 	      tree vtype
8986 		= vector_vector_composition_type (vectype,
8987 						  const_nunits / group_size,
8988 						  &ptype);
8989 	      if (vtype != NULL_TREE)
8990 		{
8991 		  nloads = const_nunits / group_size;
8992 		  lnel = group_size;
8993 		  lvectype = vtype;
8994 		  ltype = ptype;
8995 		}
8996 	    }
8997 	  else
8998 	    {
8999 	      nloads = 1;
9000 	      lnel = const_nunits;
9001 	      ltype = vectype;
9002 	    }
9003 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9004 	}
9005       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
9006       else if (nloads == 1)
9007 	ltype = vectype;
9008 
9009       if (slp)
9010 	{
9011 	  /* For SLP permutation support we need to load the whole group,
9012 	     not only the number of vector stmts the permutation result
9013 	     fits in.  */
9014 	  if (slp_perm)
9015 	    {
9016 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9017 		 variable VF.  */
9018 	      unsigned int const_vf = vf.to_constant ();
9019 	      ncopies = CEIL (group_size * const_vf, const_nunits);
9020 	      dr_chain.create (ncopies);
9021 	    }
9022 	  else
9023 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9024 	}
9025       unsigned int group_el = 0;
9026       unsigned HOST_WIDE_INT
9027 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9028       for (j = 0; j < ncopies; j++)
9029 	{
9030 	  if (nloads > 1)
9031 	    vec_alloc (v, nloads);
9032 	  stmt_vec_info new_stmt_info = NULL;
9033 	  for (i = 0; i < nloads; i++)
9034 	    {
9035 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
9036 					     group_el * elsz + cst_offset);
9037 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9038 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9039 	      gassign *new_stmt
9040 		= gimple_build_assign (make_ssa_name (ltype), data_ref);
9041 	      new_stmt_info
9042 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9043 	      if (nloads > 1)
9044 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9045 					gimple_assign_lhs (new_stmt));
9046 
9047 	      group_el += lnel;
9048 	      if (! slp
9049 		  || group_el == group_size)
9050 		{
9051 		  tree newoff = copy_ssa_name (running_off);
9052 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9053 						      running_off, stride_step);
9054 		  vect_finish_stmt_generation (stmt_info, incr, gsi);
9055 
9056 		  running_off = newoff;
9057 		  group_el = 0;
9058 		}
9059 	    }
9060 	  if (nloads > 1)
9061 	    {
9062 	      tree vec_inv = build_constructor (lvectype, v);
9063 	      new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9064 	      new_stmt_info = vinfo->lookup_def (new_temp);
9065 	      if (lvectype != vectype)
9066 		{
9067 		  gassign *new_stmt
9068 		    = gimple_build_assign (make_ssa_name (vectype),
9069 					   VIEW_CONVERT_EXPR,
9070 					   build1 (VIEW_CONVERT_EXPR,
9071 						   vectype, new_temp));
9072 		  new_stmt_info
9073 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9074 		}
9075 	    }
9076 
9077 	  if (slp)
9078 	    {
9079 	      if (slp_perm)
9080 		dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9081 	      else
9082 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9083 	    }
9084 	  else
9085 	    {
9086 	      if (j == 0)
9087 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9088 	      else
9089 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9090 	      prev_stmt_info = new_stmt_info;
9091 	    }
9092 	}
9093       if (slp_perm)
9094 	{
9095 	  unsigned n_perms;
9096 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9097 					slp_node_instance, false, &n_perms);
9098 	}
9099       return true;
9100     }
9101 
9102   if (memory_access_type == VMAT_GATHER_SCATTER
9103       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9104     grouped_load = false;
9105 
9106   if (grouped_load)
9107     {
9108       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9109       group_size = DR_GROUP_SIZE (first_stmt_info);
9110       /* For SLP vectorization we directly vectorize a subchain
9111          without permutation.  */
9112       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9113 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9114       /* For BB vectorization always use the first stmt to base
9115 	 the data ref pointer on.  */
9116       if (bb_vinfo)
9117 	first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9118 
9119       /* Check if the chain of loads is already vectorized.  */
9120       if (STMT_VINFO_VEC_STMT (first_stmt_info)
9121 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9122 	     ???  But we can only do so if there is exactly one
9123 	     as we have no way to get at the rest.  Leave the CSE
9124 	     opportunity alone.
9125 	     ???  With the group load eventually participating
9126 	     in multiple different permutations (having multiple
9127 	     slp nodes which refer to the same group) the CSE
9128 	     is even wrong code.  See PR56270.  */
9129 	  && !slp)
9130 	{
9131 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9132 	  return true;
9133 	}
9134       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9135       group_gap_adj = 0;
9136 
9137       /* VEC_NUM is the number of vect stmts to be created for this group.  */
9138       if (slp)
9139 	{
9140 	  grouped_load = false;
9141 	  /* If an SLP permutation is from N elements to N elements,
9142 	     and if one vector holds a whole number of N, we can load
9143 	     the inputs to the permutation in the same way as an
9144 	     unpermuted sequence.  In other cases we need to load the
9145 	     whole group, not only the number of vector stmts the
9146 	     permutation result fits in.  */
9147 	  if (slp_perm
9148 	      && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9149 		  || !multiple_p (nunits, group_size)))
9150 	    {
9151 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9152 		 variable VF; see vect_transform_slp_perm_load.  */
9153 	      unsigned int const_vf = vf.to_constant ();
9154 	      unsigned int const_nunits = nunits.to_constant ();
9155 	      vec_num = CEIL (group_size * const_vf, const_nunits);
9156 	      group_gap_adj = vf * group_size - nunits * vec_num;
9157 	    }
9158 	  else
9159 	    {
9160 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9161 	      group_gap_adj
9162 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9163 	    }
9164     	}
9165       else
9166 	vec_num = group_size;
9167 
9168       ref_type = get_group_alias_ptr_type (first_stmt_info);
9169     }
9170   else
9171     {
9172       first_stmt_info = stmt_info;
9173       first_dr_info = dr_info;
9174       group_size = vec_num = 1;
9175       group_gap_adj = 0;
9176       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9177     }
9178 
9179   /* Gather-scatter accesses perform only component accesses, alignment
9180      is irrelevant for them.  */
9181   if (memory_access_type == VMAT_GATHER_SCATTER)
9182     alignment_support_scheme = dr_unaligned_supported;
9183   else
9184     alignment_support_scheme
9185       = vect_supportable_dr_alignment (first_dr_info, false);
9186 
9187   gcc_assert (alignment_support_scheme);
9188   vec_loop_masks *loop_masks
9189     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9190        ? &LOOP_VINFO_MASKS (loop_vinfo)
9191        : NULL);
9192   /* Targets with store-lane instructions must not require explicit
9193      realignment.  vect_supportable_dr_alignment always returns either
9194      dr_aligned or dr_unaligned_supported for masked operations.  */
9195   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9196 	       && !mask
9197 	       && !loop_masks)
9198 	      || alignment_support_scheme == dr_aligned
9199 	      || alignment_support_scheme == dr_unaligned_supported);
9200 
9201   /* In case the vectorization factor (VF) is bigger than the number
9202      of elements that we can fit in a vectype (nunits), we have to generate
9203      more than one vector stmt - i.e - we need to "unroll" the
9204      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
9205      from one copy of the vector stmt to the next, in the field
9206      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
9207      stages to find the correct vector defs to be used when vectorizing
9208      stmts that use the defs of the current stmt.  The example below
9209      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9210      need to create 4 vectorized stmts):
9211 
9212      before vectorization:
9213                                 RELATED_STMT    VEC_STMT
9214         S1:     x = memref      -               -
9215         S2:     z = x + 1       -               -
9216 
9217      step 1: vectorize stmt S1:
9218         We first create the vector stmt VS1_0, and, as usual, record a
9219         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9220         Next, we create the vector stmt VS1_1, and record a pointer to
9221         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9222         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
9223         stmts and pointers:
9224                                 RELATED_STMT    VEC_STMT
9225         VS1_0:  vx0 = memref0   VS1_1           -
9226         VS1_1:  vx1 = memref1   VS1_2           -
9227         VS1_2:  vx2 = memref2   VS1_3           -
9228         VS1_3:  vx3 = memref3   -               -
9229         S1:     x = load        -               VS1_0
9230         S2:     z = x + 1       -               -
9231 
9232      See in documentation in vect_get_vec_def_for_stmt_copy for how the
9233      information we recorded in RELATED_STMT field is used to vectorize
9234      stmt S2.  */
9235 
9236   /* In case of interleaving (non-unit grouped access):
9237 
9238      S1:  x2 = &base + 2
9239      S2:  x0 = &base
9240      S3:  x1 = &base + 1
9241      S4:  x3 = &base + 3
9242 
9243      Vectorized loads are created in the order of memory accesses
9244      starting from the access of the first stmt of the chain:
9245 
9246      VS1: vx0 = &base
9247      VS2: vx1 = &base + vec_size*1
9248      VS3: vx3 = &base + vec_size*2
9249      VS4: vx4 = &base + vec_size*3
9250 
9251      Then permutation statements are generated:
9252 
9253      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9254      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9255        ...
9256 
9257      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9258      (the order of the data-refs in the output of vect_permute_load_chain
9259      corresponds to the order of scalar stmts in the interleaving chain - see
9260      the documentation of vect_permute_load_chain()).
9261      The generation of permutation stmts and recording them in
9262      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9263 
9264      In case of both multiple types and interleaving, the vector loads and
9265      permutation stmts above are created for every copy.  The result vector
9266      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9267      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
9268 
9269   /* If the data reference is aligned (dr_aligned) or potentially unaligned
9270      on a target that supports unaligned accesses (dr_unaligned_supported)
9271      we generate the following code:
9272          p = initial_addr;
9273          indx = 0;
9274          loop {
9275 	   p = p + indx * vectype_size;
9276            vec_dest = *(p);
9277            indx = indx + 1;
9278          }
9279 
9280      Otherwise, the data reference is potentially unaligned on a target that
9281      does not support unaligned accesses (dr_explicit_realign_optimized) -
9282      then generate the following code, in which the data in each iteration is
9283      obtained by two vector loads, one from the previous iteration, and one
9284      from the current iteration:
9285          p1 = initial_addr;
9286          msq_init = *(floor(p1))
9287          p2 = initial_addr + VS - 1;
9288          realignment_token = call target_builtin;
9289          indx = 0;
9290          loop {
9291            p2 = p2 + indx * vectype_size
9292            lsq = *(floor(p2))
9293            vec_dest = realign_load (msq, lsq, realignment_token)
9294            indx = indx + 1;
9295            msq = lsq;
9296          }   */
9297 
9298   /* If the misalignment remains the same throughout the execution of the
9299      loop, we can create the init_addr and permutation mask at the loop
9300      preheader.  Otherwise, it needs to be created inside the loop.
9301      This can only occur when vectorizing memory accesses in the inner-loop
9302      nested within an outer-loop that is being vectorized.  */
9303 
9304   if (nested_in_vect_loop
9305       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9306 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
9307     {
9308       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9309       compute_in_loop = true;
9310     }
9311 
9312   bool diff_first_stmt_info
9313     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9314 
9315   if ((alignment_support_scheme == dr_explicit_realign_optimized
9316        || alignment_support_scheme == dr_explicit_realign)
9317       && !compute_in_loop)
9318     {
9319       /* If we have different first_stmt_info, we can't set up realignment
9320 	 here, since we can't guarantee first_stmt_info DR has been
9321 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9322 	 distance from first_stmt_info DR instead as below.  */
9323       if (!diff_first_stmt_info)
9324 	msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
9325 				      alignment_support_scheme, NULL_TREE,
9326 				      &at_loop);
9327       if (alignment_support_scheme == dr_explicit_realign_optimized)
9328 	{
9329 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9330 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9331 				    size_one_node);
9332 	  gcc_assert (!first_stmt_info_for_drptr);
9333 	}
9334     }
9335   else
9336     at_loop = loop;
9337 
9338   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9339     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9340 
9341   tree bump;
9342   tree vec_offset = NULL_TREE;
9343   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9344     {
9345       aggr_type = NULL_TREE;
9346       bump = NULL_TREE;
9347     }
9348   else if (memory_access_type == VMAT_GATHER_SCATTER)
9349     {
9350       aggr_type = elem_type;
9351       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9352 				       &bump, &vec_offset);
9353     }
9354   else
9355     {
9356       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9357 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9358       else
9359 	aggr_type = vectype;
9360       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9361 					  memory_access_type);
9362     }
9363 
9364   tree vec_mask = NULL_TREE;
9365   prev_stmt_info = NULL;
9366   poly_uint64 group_elt = 0;
9367   for (j = 0; j < ncopies; j++)
9368     {
9369       stmt_vec_info new_stmt_info = NULL;
9370       /* 1. Create the vector or array pointer update chain.  */
9371       if (j == 0)
9372 	{
9373 	  bool simd_lane_access_p
9374 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9375 	  if (simd_lane_access_p
9376 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9377 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9378 	      && integer_zerop (get_dr_vinfo_offset (first_dr_info))
9379 	      && integer_zerop (DR_INIT (first_dr_info->dr))
9380 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
9381 					get_alias_set (TREE_TYPE (ref_type)))
9382 	      && (alignment_support_scheme == dr_aligned
9383 		  || alignment_support_scheme == dr_unaligned_supported))
9384 	    {
9385 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9386 	      dataref_offset = build_int_cst (ref_type, 0);
9387 	    }
9388 	  else if (diff_first_stmt_info)
9389 	    {
9390 	      dataref_ptr
9391 		= vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9392 					    aggr_type, at_loop, offset, &dummy,
9393 					    gsi, &ptr_incr, simd_lane_access_p,
9394 					    byte_offset, bump);
9395 	      /* Adjust the pointer by the difference to first_stmt.  */
9396 	      data_reference_p ptrdr
9397 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9398 	      tree diff
9399 		= fold_convert (sizetype,
9400 				size_binop (MINUS_EXPR,
9401 					    DR_INIT (first_dr_info->dr),
9402 					    DR_INIT (ptrdr)));
9403 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9404 					     stmt_info, diff);
9405 	      if (alignment_support_scheme == dr_explicit_realign)
9406 		{
9407 		  msq = vect_setup_realignment (first_stmt_info_for_drptr, gsi,
9408 						&realignment_token,
9409 						alignment_support_scheme,
9410 						dataref_ptr, &at_loop);
9411 		  gcc_assert (!compute_in_loop);
9412 		}
9413 	    }
9414 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9415 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9416 					 &dataref_ptr, &vec_offset);
9417 	  else
9418 	    dataref_ptr
9419 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9420 					  offset, &dummy, gsi, &ptr_incr,
9421 					  simd_lane_access_p,
9422 					  byte_offset, bump);
9423 	  if (mask)
9424 	    {
9425 	      if (slp_node)
9426 		{
9427 		  auto_vec<vec<tree> > vec_defs (1);
9428 		  vect_get_slp_defs (slp_node, &vec_defs);
9429 		  vec_mask = vec_defs[0][0];
9430 		}
9431 	      else
9432 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9433 							 mask_vectype);
9434 	    }
9435 	}
9436       else
9437 	{
9438 	  if (dataref_offset)
9439 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9440 					      bump);
9441 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9442 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9443 	  else
9444 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9445 					   stmt_info, bump);
9446 	  if (mask)
9447 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9448 	}
9449 
9450       if (grouped_load || slp_perm)
9451 	dr_chain.create (vec_num);
9452 
9453       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9454 	{
9455 	  tree vec_array;
9456 
9457 	  vec_array = create_vector_array (vectype, vec_num);
9458 
9459 	  tree final_mask = NULL_TREE;
9460 	  if (loop_masks)
9461 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9462 					     vectype, j);
9463 	  if (vec_mask)
9464 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9465 						  vec_mask, gsi);
9466 
9467 	  gcall *call;
9468 	  if (final_mask)
9469 	    {
9470 	      /* Emit:
9471 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9472 		                                VEC_MASK).  */
9473 	      unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9474 	      tree alias_ptr = build_int_cst (ref_type, align);
9475 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9476 						 dataref_ptr, alias_ptr,
9477 						 final_mask);
9478 	    }
9479 	  else
9480 	    {
9481 	      /* Emit:
9482 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
9483 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9484 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9485 	    }
9486 	  gimple_call_set_lhs (call, vec_array);
9487 	  gimple_call_set_nothrow (call, true);
9488 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9489 
9490 	  /* Extract each vector into an SSA_NAME.  */
9491 	  for (i = 0; i < vec_num; i++)
9492 	    {
9493 	      new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9494 					    vec_array, i);
9495 	      dr_chain.quick_push (new_temp);
9496 	    }
9497 
9498 	  /* Record the mapping between SSA_NAMEs and statements.  */
9499 	  vect_record_grouped_load_vectors (stmt_info, dr_chain);
9500 
9501 	  /* Record that VEC_ARRAY is now dead.  */
9502 	  vect_clobber_variable (stmt_info, gsi, vec_array);
9503 	}
9504       else
9505 	{
9506 	  for (i = 0; i < vec_num; i++)
9507 	    {
9508 	      tree final_mask = NULL_TREE;
9509 	      if (loop_masks
9510 		  && memory_access_type != VMAT_INVARIANT)
9511 		final_mask = vect_get_loop_mask (gsi, loop_masks,
9512 						 vec_num * ncopies,
9513 						 vectype, vec_num * j + i);
9514 	      if (vec_mask)
9515 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9516 						      vec_mask, gsi);
9517 
9518 	      if (i > 0)
9519 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9520 					       stmt_info, bump);
9521 
9522 	      /* 2. Create the vector-load in the loop.  */
9523 	      gimple *new_stmt = NULL;
9524 	      switch (alignment_support_scheme)
9525 		{
9526 		case dr_aligned:
9527 		case dr_unaligned_supported:
9528 		  {
9529 		    unsigned int misalign;
9530 		    unsigned HOST_WIDE_INT align;
9531 
9532 		    if (memory_access_type == VMAT_GATHER_SCATTER)
9533 		      {
9534 			tree zero = build_zero_cst (vectype);
9535 			tree scale = size_int (gs_info.scale);
9536 			gcall *call;
9537 			if (loop_masks)
9538 			  call = gimple_build_call_internal
9539 			    (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9540 			     vec_offset, scale, zero, final_mask);
9541 			else
9542 			  call = gimple_build_call_internal
9543 			    (IFN_GATHER_LOAD, 4, dataref_ptr,
9544 			     vec_offset, scale, zero);
9545 			gimple_call_set_nothrow (call, true);
9546 			new_stmt = call;
9547 			data_ref = NULL_TREE;
9548 			break;
9549 		      }
9550 
9551 		    align =
9552 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9553 		    if (alignment_support_scheme == dr_aligned)
9554 		      {
9555 			gcc_assert (aligned_access_p (first_dr_info));
9556 			misalign = 0;
9557 		      }
9558 		    else if (DR_MISALIGNMENT (first_dr_info) == -1)
9559 		      {
9560 			align = dr_alignment
9561 			  (vect_dr_behavior (first_dr_info));
9562 			misalign = 0;
9563 		      }
9564 		    else
9565 		      misalign = DR_MISALIGNMENT (first_dr_info);
9566 		    if (dataref_offset == NULL_TREE
9567 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
9568 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9569 					      align, misalign);
9570 
9571 		    if (final_mask)
9572 		      {
9573 			align = least_bit_hwi (misalign | align);
9574 			tree ptr = build_int_cst (ref_type,
9575 						  align * BITS_PER_UNIT);
9576 			gcall *call
9577 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9578 							dataref_ptr, ptr,
9579 							final_mask);
9580 			gimple_call_set_nothrow (call, true);
9581 			new_stmt = call;
9582 			data_ref = NULL_TREE;
9583 		      }
9584 		    else
9585 		      {
9586 			tree ltype = vectype;
9587 			tree new_vtype = NULL_TREE;
9588 			/* If there's no peeling for gaps but we have a gap
9589 			   with slp loads then load the lower half of the
9590 			   vector only.  See get_group_load_store_type for
9591 			   when we apply this optimization.  */
9592 			if (slp
9593 			    && loop_vinfo
9594 			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9595 			    && DR_GROUP_GAP (first_stmt_info) != 0
9596 			    && known_eq (nunits,
9597 					 (group_size
9598 					  - DR_GROUP_GAP (first_stmt_info)) * 2)
9599 			    && known_eq (nunits, group_size))
9600 			  {
9601 			    tree half_vtype;
9602 			    new_vtype
9603 			      = vector_vector_composition_type (vectype, 2,
9604 								&half_vtype);
9605 			    if (new_vtype != NULL_TREE)
9606 			      ltype = half_vtype;
9607 			  }
9608 			tree offset
9609 			  = (dataref_offset ? dataref_offset
9610 					    : build_int_cst (ref_type, 0));
9611 			if (ltype != vectype
9612 			    && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9613 			  {
9614 			    unsigned HOST_WIDE_INT gap
9615 			      = DR_GROUP_GAP (first_stmt_info);
9616 			    gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9617 			    tree gapcst = build_int_cst (ref_type, gap);
9618 			    offset = size_binop (PLUS_EXPR, offset, gapcst);
9619 			  }
9620 			data_ref
9621 			  = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9622 			if (alignment_support_scheme == dr_aligned)
9623 			  ;
9624 			else if (DR_MISALIGNMENT (first_dr_info) == -1)
9625 			  TREE_TYPE (data_ref)
9626 			    = build_aligned_type (TREE_TYPE (data_ref),
9627 						  align * BITS_PER_UNIT);
9628 			else
9629 			  TREE_TYPE (data_ref)
9630 			    = build_aligned_type (TREE_TYPE (data_ref),
9631 						  TYPE_ALIGN (elem_type));
9632 			if (ltype != vectype)
9633 			  {
9634 			    vect_copy_ref_info (data_ref,
9635 						DR_REF (first_dr_info->dr));
9636 			    tree tem = make_ssa_name (ltype);
9637 			    new_stmt = gimple_build_assign (tem, data_ref);
9638 			    vect_finish_stmt_generation (stmt_info, new_stmt,
9639 							 gsi);
9640 			    data_ref = NULL;
9641 			    vec<constructor_elt, va_gc> *v;
9642 			    vec_alloc (v, 2);
9643 			    if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9644 			      {
9645 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9646 							build_zero_cst (ltype));
9647 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9648 			      }
9649 			    else
9650 			      {
9651 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9652 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9653 							build_zero_cst (ltype));
9654 			      }
9655 			    gcc_assert (new_vtype != NULL_TREE);
9656 			    if (new_vtype == vectype)
9657 			      new_stmt = gimple_build_assign (
9658 				vec_dest, build_constructor (vectype, v));
9659 			    else
9660 			      {
9661 				tree new_vname = make_ssa_name (new_vtype);
9662 				new_stmt = gimple_build_assign (
9663 				  new_vname, build_constructor (new_vtype, v));
9664 				vect_finish_stmt_generation (stmt_info,
9665 							     new_stmt, gsi);
9666 				new_stmt = gimple_build_assign (
9667 				  vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9668 						    new_vname));
9669 			      }
9670 			  }
9671 		      }
9672 		    break;
9673 		  }
9674 		case dr_explicit_realign:
9675 		  {
9676 		    tree ptr, bump;
9677 
9678 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9679 
9680 		    if (compute_in_loop)
9681 		      msq = vect_setup_realignment (first_stmt_info, gsi,
9682 						    &realignment_token,
9683 						    dr_explicit_realign,
9684 						    dataref_ptr, NULL);
9685 
9686 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9687 		      ptr = copy_ssa_name (dataref_ptr);
9688 		    else
9689 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9690 		    // For explicit realign the target alignment should be
9691 		    // known at compile time.
9692 		    unsigned HOST_WIDE_INT align =
9693 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9694 		    new_stmt = gimple_build_assign
9695 				 (ptr, BIT_AND_EXPR, dataref_ptr,
9696 				  build_int_cst
9697 				  (TREE_TYPE (dataref_ptr),
9698 				   -(HOST_WIDE_INT) align));
9699 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9700 		    data_ref
9701 		      = build2 (MEM_REF, vectype, ptr,
9702 				build_int_cst (ref_type, 0));
9703 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9704 		    vec_dest = vect_create_destination_var (scalar_dest,
9705 							    vectype);
9706 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
9707 		    new_temp = make_ssa_name (vec_dest, new_stmt);
9708 		    gimple_assign_set_lhs (new_stmt, new_temp);
9709 		    gimple_move_vops (new_stmt, stmt_info->stmt);
9710 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9711 		    msq = new_temp;
9712 
9713 		    bump = size_binop (MULT_EXPR, vs,
9714 				       TYPE_SIZE_UNIT (elem_type));
9715 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
9716 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9717 					   stmt_info, bump);
9718 		    new_stmt = gimple_build_assign
9719 				 (NULL_TREE, BIT_AND_EXPR, ptr,
9720 				  build_int_cst
9721 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9722 		    ptr = copy_ssa_name (ptr, new_stmt);
9723 		    gimple_assign_set_lhs (new_stmt, ptr);
9724 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9725 		    data_ref
9726 		      = build2 (MEM_REF, vectype, ptr,
9727 				build_int_cst (ref_type, 0));
9728 		    break;
9729 		  }
9730 		case dr_explicit_realign_optimized:
9731 		  {
9732 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9733 		      new_temp = copy_ssa_name (dataref_ptr);
9734 		    else
9735 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9736 		    // We should only be doing this if we know the target
9737 		    // alignment at compile time.
9738 		    unsigned HOST_WIDE_INT align =
9739 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9740 		    new_stmt = gimple_build_assign
9741 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
9742 		       build_int_cst (TREE_TYPE (dataref_ptr),
9743 				     -(HOST_WIDE_INT) align));
9744 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9745 		    data_ref
9746 		      = build2 (MEM_REF, vectype, new_temp,
9747 				build_int_cst (ref_type, 0));
9748 		    break;
9749 		  }
9750 		default:
9751 		  gcc_unreachable ();
9752 		}
9753 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
9754 	      /* DATA_REF is null if we've already built the statement.  */
9755 	      if (data_ref)
9756 		{
9757 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9758 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
9759 		}
9760 	      new_temp = make_ssa_name (vec_dest, new_stmt);
9761 	      gimple_set_lhs (new_stmt, new_temp);
9762 	      new_stmt_info
9763 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9764 
9765 	      /* 3. Handle explicit realignment if necessary/supported.
9766 		 Create in loop:
9767 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
9768 	      if (alignment_support_scheme == dr_explicit_realign_optimized
9769 		  || alignment_support_scheme == dr_explicit_realign)
9770 		{
9771 		  lsq = gimple_assign_lhs (new_stmt);
9772 		  if (!realignment_token)
9773 		    realignment_token = dataref_ptr;
9774 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
9775 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9776 						  msq, lsq, realignment_token);
9777 		  new_temp = make_ssa_name (vec_dest, new_stmt);
9778 		  gimple_assign_set_lhs (new_stmt, new_temp);
9779 		  new_stmt_info
9780 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9781 
9782 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
9783 		    {
9784 		      gcc_assert (phi);
9785 		      if (i == vec_num - 1 && j == ncopies - 1)
9786 			add_phi_arg (phi, lsq,
9787 				     loop_latch_edge (containing_loop),
9788 				     UNKNOWN_LOCATION);
9789 		      msq = lsq;
9790 		    }
9791 		}
9792 
9793 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9794 		{
9795 		  tree perm_mask = perm_mask_for_reverse (vectype);
9796 		  new_temp = permute_vec_elements (new_temp, new_temp,
9797 						   perm_mask, stmt_info, gsi);
9798 		  new_stmt_info = vinfo->lookup_def (new_temp);
9799 		}
9800 
9801 	      /* Collect vector loads and later create their permutation in
9802 		 vect_transform_grouped_load ().  */
9803 	      if (grouped_load || slp_perm)
9804 		dr_chain.quick_push (new_temp);
9805 
9806 	      /* Store vector loads in the corresponding SLP_NODE.  */
9807 	      if (slp && !slp_perm)
9808 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9809 
9810 	      /* With SLP permutation we load the gaps as well, without
9811 	         we need to skip the gaps after we manage to fully load
9812 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
9813 	      group_elt += nunits;
9814 	      if (maybe_ne (group_gap_adj, 0U)
9815 		  && !slp_perm
9816 		  && known_eq (group_elt, group_size - group_gap_adj))
9817 		{
9818 		  poly_wide_int bump_val
9819 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9820 		       * group_gap_adj);
9821 		  tree bump = wide_int_to_tree (sizetype, bump_val);
9822 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9823 						 stmt_info, bump);
9824 		  group_elt = 0;
9825 		}
9826 	    }
9827 	  /* Bump the vector pointer to account for a gap or for excess
9828 	     elements loaded for a permuted SLP load.  */
9829 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9830 	    {
9831 	      poly_wide_int bump_val
9832 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9833 		   * group_gap_adj);
9834 	      tree bump = wide_int_to_tree (sizetype, bump_val);
9835 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9836 					     stmt_info, bump);
9837 	    }
9838 	}
9839 
9840       if (slp && !slp_perm)
9841 	continue;
9842 
9843       if (slp_perm)
9844         {
9845 	  unsigned n_perms;
9846           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9847                                              slp_node_instance, false,
9848 					     &n_perms))
9849             {
9850               dr_chain.release ();
9851               return false;
9852             }
9853         }
9854       else
9855         {
9856           if (grouped_load)
9857   	    {
9858 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
9859 		vect_transform_grouped_load (stmt_info, dr_chain,
9860 					     group_size, gsi);
9861 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9862 	    }
9863           else
9864 	    {
9865 	      if (j == 0)
9866 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9867 	      else
9868 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9869 	      prev_stmt_info = new_stmt_info;
9870 	    }
9871         }
9872       dr_chain.release ();
9873     }
9874 
9875   return true;
9876 }
9877 
9878 /* Function vect_is_simple_cond.
9879 
9880    Input:
9881    LOOP - the loop that is being vectorized.
9882    COND - Condition that is checked for simple use.
9883 
9884    Output:
9885    *COMP_VECTYPE - the vector type for the comparison.
9886    *DTS - The def types for the arguments of the comparison
9887 
9888    Returns whether a COND can be vectorized.  Checks whether
9889    condition operands are supportable using vec_is_simple_use.  */
9890 
9891 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)9892 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9893 		     tree *comp_vectype, enum vect_def_type *dts,
9894 		     tree vectype)
9895 {
9896   tree lhs, rhs;
9897   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9898 
9899   /* Mask case.  */
9900   if (TREE_CODE (cond) == SSA_NAME
9901       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9902     {
9903       if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9904 	  || !*comp_vectype
9905 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9906 	return false;
9907       return true;
9908     }
9909 
9910   if (!COMPARISON_CLASS_P (cond))
9911     return false;
9912 
9913   lhs = TREE_OPERAND (cond, 0);
9914   rhs = TREE_OPERAND (cond, 1);
9915 
9916   if (TREE_CODE (lhs) == SSA_NAME)
9917     {
9918       if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9919 	return false;
9920     }
9921   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9922 	   || TREE_CODE (lhs) == FIXED_CST)
9923     dts[0] = vect_constant_def;
9924   else
9925     return false;
9926 
9927   if (TREE_CODE (rhs) == SSA_NAME)
9928     {
9929       if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9930 	return false;
9931     }
9932   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9933 	   || TREE_CODE (rhs) == FIXED_CST)
9934     dts[1] = vect_constant_def;
9935   else
9936     return false;
9937 
9938   if (vectype1 && vectype2
9939       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9940 		   TYPE_VECTOR_SUBPARTS (vectype2)))
9941     return false;
9942 
9943   *comp_vectype = vectype1 ? vectype1 : vectype2;
9944   /* Invariant comparison.  */
9945   if (! *comp_vectype)
9946     {
9947       tree scalar_type = TREE_TYPE (lhs);
9948       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9949 	*comp_vectype = truth_type_for (vectype);
9950       else
9951 	{
9952 	  /* If we can widen the comparison to match vectype do so.  */
9953 	  if (INTEGRAL_TYPE_P (scalar_type)
9954 	      && !slp_node
9955 	      && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9956 				  TYPE_SIZE (TREE_TYPE (vectype))))
9957 	    scalar_type = build_nonstandard_integer_type
9958 	      (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9959 	       TYPE_UNSIGNED (scalar_type));
9960 	  *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9961 						       slp_node);
9962 	}
9963     }
9964 
9965   return true;
9966 }
9967 
9968 /* vectorizable_condition.
9969 
9970    Check if STMT_INFO is conditional modify expression that can be vectorized.
9971    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9972    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
9973    at GSI.
9974 
9975    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9976 
9977    Return true if STMT_INFO is vectorizable in this way.  */
9978 
9979 static bool
vectorizable_condition(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)9980 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9981 			stmt_vec_info *vec_stmt,
9982 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9983 {
9984   vec_info *vinfo = stmt_info->vinfo;
9985   tree scalar_dest = NULL_TREE;
9986   tree vec_dest = NULL_TREE;
9987   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9988   tree then_clause, else_clause;
9989   tree comp_vectype = NULL_TREE;
9990   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9991   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9992   tree vec_compare;
9993   tree new_temp;
9994   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9995   enum vect_def_type dts[4]
9996     = {vect_unknown_def_type, vect_unknown_def_type,
9997        vect_unknown_def_type, vect_unknown_def_type};
9998   int ndts = 4;
9999   int ncopies;
10000   int vec_num;
10001   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10002   stmt_vec_info prev_stmt_info = NULL;
10003   int i, j;
10004   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10005   vec<tree> vec_oprnds0 = vNULL;
10006   vec<tree> vec_oprnds1 = vNULL;
10007   vec<tree> vec_oprnds2 = vNULL;
10008   vec<tree> vec_oprnds3 = vNULL;
10009   tree vec_cmp_type;
10010   bool masked = false;
10011 
10012   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10013     return false;
10014 
10015   /* Is vectorizable conditional operation?  */
10016   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10017   if (!stmt)
10018     return false;
10019 
10020   code = gimple_assign_rhs_code (stmt);
10021   if (code != COND_EXPR)
10022     return false;
10023 
10024   stmt_vec_info reduc_info = NULL;
10025   int reduc_index = -1;
10026   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10027   bool for_reduction
10028     = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10029   if (for_reduction)
10030     {
10031       if (STMT_SLP_TYPE (stmt_info))
10032 	return false;
10033       reduc_info = info_for_reduction (stmt_info);
10034       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10035       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10036       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10037 		  || reduc_index != -1);
10038     }
10039   else
10040     {
10041       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10042 	return false;
10043 
10044       /* FORNOW: only supported as part of a reduction.  */
10045       if (STMT_VINFO_LIVE_P (stmt_info))
10046 	{
10047 	  if (dump_enabled_p ())
10048 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10049 			     "value used after loop.\n");
10050 	  return false;
10051 	}
10052     }
10053 
10054   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10055   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10056 
10057   if (slp_node)
10058     {
10059       ncopies = 1;
10060       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10061     }
10062   else
10063     {
10064       ncopies = vect_get_num_copies (loop_vinfo, vectype);
10065       vec_num = 1;
10066     }
10067 
10068   gcc_assert (ncopies >= 1);
10069   if (for_reduction && ncopies > 1)
10070     return false; /* FORNOW */
10071 
10072   cond_expr = gimple_assign_rhs1 (stmt);
10073   then_clause = gimple_assign_rhs2 (stmt);
10074   else_clause = gimple_assign_rhs3 (stmt);
10075 
10076   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node,
10077 			    &comp_vectype, &dts[0], vectype)
10078       || !comp_vectype)
10079     return false;
10080 
10081   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
10082     return false;
10083   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
10084     return false;
10085 
10086   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10087     return false;
10088 
10089   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10090     return false;
10091 
10092   masked = !COMPARISON_CLASS_P (cond_expr);
10093   vec_cmp_type = truth_type_for (comp_vectype);
10094 
10095   if (vec_cmp_type == NULL_TREE)
10096     return false;
10097 
10098   cond_code = TREE_CODE (cond_expr);
10099   if (!masked)
10100     {
10101       cond_expr0 = TREE_OPERAND (cond_expr, 0);
10102       cond_expr1 = TREE_OPERAND (cond_expr, 1);
10103     }
10104 
10105   /* For conditional reductions, the "then" value needs to be the candidate
10106      value calculated by this iteration while the "else" value needs to be
10107      the result carried over from previous iterations.  If the COND_EXPR
10108      is the other way around, we need to swap it.  */
10109   bool must_invert_cmp_result = false;
10110   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10111     {
10112       if (masked)
10113 	must_invert_cmp_result = true;
10114       else
10115 	{
10116 	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10117 	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10118 	  if (new_code == ERROR_MARK)
10119 	    must_invert_cmp_result = true;
10120 	  else
10121 	    {
10122 	      cond_code = new_code;
10123 	      /* Make sure we don't accidentally use the old condition.  */
10124 	      cond_expr = NULL_TREE;
10125 	    }
10126 	}
10127       std::swap (then_clause, else_clause);
10128     }
10129 
10130   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10131     {
10132       /* Boolean values may have another representation in vectors
10133 	 and therefore we prefer bit operations over comparison for
10134 	 them (which also works for scalar masks).  We store opcodes
10135 	 to use in bitop1 and bitop2.  Statement is vectorized as
10136 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10137 	 depending on bitop1 and bitop2 arity.  */
10138       switch (cond_code)
10139 	{
10140 	case GT_EXPR:
10141 	  bitop1 = BIT_NOT_EXPR;
10142 	  bitop2 = BIT_AND_EXPR;
10143 	  break;
10144 	case GE_EXPR:
10145 	  bitop1 = BIT_NOT_EXPR;
10146 	  bitop2 = BIT_IOR_EXPR;
10147 	  break;
10148 	case LT_EXPR:
10149 	  bitop1 = BIT_NOT_EXPR;
10150 	  bitop2 = BIT_AND_EXPR;
10151 	  std::swap (cond_expr0, cond_expr1);
10152 	  break;
10153 	case LE_EXPR:
10154 	  bitop1 = BIT_NOT_EXPR;
10155 	  bitop2 = BIT_IOR_EXPR;
10156 	  std::swap (cond_expr0, cond_expr1);
10157 	  break;
10158 	case NE_EXPR:
10159 	  bitop1 = BIT_XOR_EXPR;
10160 	  break;
10161 	case EQ_EXPR:
10162 	  bitop1 = BIT_XOR_EXPR;
10163 	  bitop2 = BIT_NOT_EXPR;
10164 	  break;
10165 	default:
10166 	  return false;
10167 	}
10168       cond_code = SSA_NAME;
10169     }
10170 
10171   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10172       && reduction_type == EXTRACT_LAST_REDUCTION
10173       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10174     {
10175       if (dump_enabled_p ())
10176 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10177 			 "reduction comparison operation not supported.\n");
10178       return false;
10179     }
10180 
10181   if (!vec_stmt)
10182     {
10183       if (bitop1 != NOP_EXPR)
10184 	{
10185 	  machine_mode mode = TYPE_MODE (comp_vectype);
10186 	  optab optab;
10187 
10188 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10189 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10190 	    return false;
10191 
10192 	  if (bitop2 != NOP_EXPR)
10193 	    {
10194 	      optab = optab_for_tree_code (bitop2, comp_vectype,
10195 					   optab_default);
10196 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10197 		return false;
10198 	    }
10199 	}
10200 
10201       if (loop_vinfo
10202 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10203 	  && reduction_type == EXTRACT_LAST_REDUCTION)
10204 	vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10205 			       ncopies * vec_num, vectype, NULL);
10206 
10207       vect_cost_for_stmt kind = vector_stmt;
10208       if (reduction_type == EXTRACT_LAST_REDUCTION)
10209 	/* Count one reduction-like operation per vector.  */
10210 	kind = vec_to_scalar;
10211       else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10212 	return false;
10213 
10214       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10215       vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10216 			      cost_vec, kind);
10217       return true;
10218     }
10219 
10220   /* Transform.  */
10221 
10222   if (!slp_node)
10223     {
10224       vec_oprnds0.create (1);
10225       vec_oprnds1.create (1);
10226       vec_oprnds2.create (1);
10227       vec_oprnds3.create (1);
10228     }
10229 
10230   /* Handle def.  */
10231   scalar_dest = gimple_assign_lhs (stmt);
10232   if (reduction_type != EXTRACT_LAST_REDUCTION)
10233     vec_dest = vect_create_destination_var (scalar_dest, vectype);
10234 
10235   /* Handle cond expr.  */
10236   for (j = 0; j < ncopies; j++)
10237     {
10238       bool swap_cond_operands = false;
10239 
10240       /* See whether another part of the vectorized code applies a loop
10241 	 mask to the condition, or to its inverse.  */
10242 
10243       vec_loop_masks *masks = NULL;
10244       if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10245 	{
10246 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
10247 	    masks = &LOOP_VINFO_MASKS (loop_vinfo);
10248 	  else
10249 	    {
10250 	      scalar_cond_masked_key cond (cond_expr, ncopies);
10251 	      if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10252 		masks = &LOOP_VINFO_MASKS (loop_vinfo);
10253 	      else
10254 		{
10255 		  bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10256 		  cond.code = invert_tree_comparison (cond.code, honor_nans);
10257 		  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10258 		    {
10259 		      masks = &LOOP_VINFO_MASKS (loop_vinfo);
10260 		      cond_code = cond.code;
10261 		      swap_cond_operands = true;
10262 		    }
10263 		}
10264 	    }
10265 	}
10266 
10267       stmt_vec_info new_stmt_info = NULL;
10268       if (j == 0)
10269 	{
10270           if (slp_node)
10271             {
10272 	      auto_vec<vec<tree>, 4> vec_defs;
10273               vect_get_slp_defs (slp_node, &vec_defs);
10274 	      vec_oprnds3 = vec_defs.pop ();
10275 	      vec_oprnds2 = vec_defs.pop ();
10276 	      if (!masked)
10277 		vec_oprnds1 = vec_defs.pop ();
10278 	      vec_oprnds0 = vec_defs.pop ();
10279             }
10280           else
10281             {
10282 	      if (masked)
10283 		{
10284 		  vec_cond_lhs
10285 		    = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10286 						    comp_vectype);
10287 		}
10288 	      else
10289 		{
10290 		  vec_cond_lhs
10291 		    = vect_get_vec_def_for_operand (cond_expr0,
10292 						    stmt_info, comp_vectype);
10293 		  vec_cond_rhs
10294 		    = vect_get_vec_def_for_operand (cond_expr1,
10295 						    stmt_info, comp_vectype);
10296 		}
10297 	      vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10298 							      stmt_info);
10299 	      if (reduction_type != EXTRACT_LAST_REDUCTION)
10300 		vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10301 								stmt_info);
10302 	    }
10303 	}
10304       else
10305 	{
10306 	  vec_cond_lhs
10307 	    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10308 	  if (!masked)
10309 	    vec_cond_rhs
10310 	      = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10311 
10312 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10313 							    vec_oprnds2.pop ());
10314 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10315 							    vec_oprnds3.pop ());
10316 	}
10317 
10318       if (!slp_node)
10319         {
10320 	  vec_oprnds0.quick_push (vec_cond_lhs);
10321 	  if (!masked)
10322 	    vec_oprnds1.quick_push (vec_cond_rhs);
10323 	  vec_oprnds2.quick_push (vec_then_clause);
10324 	  vec_oprnds3.quick_push (vec_else_clause);
10325 	}
10326 
10327       /* Arguments are ready.  Create the new vector stmt.  */
10328       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10329         {
10330           vec_then_clause = vec_oprnds2[i];
10331           vec_else_clause = vec_oprnds3[i];
10332 
10333 	  if (swap_cond_operands)
10334 	    std::swap (vec_then_clause, vec_else_clause);
10335 
10336 	  if (masked)
10337 	    vec_compare = vec_cond_lhs;
10338 	  else
10339 	    {
10340 	      vec_cond_rhs = vec_oprnds1[i];
10341 	      if (bitop1 == NOP_EXPR)
10342 		vec_compare = build2 (cond_code, vec_cmp_type,
10343 				      vec_cond_lhs, vec_cond_rhs);
10344 	      else
10345 		{
10346 		  new_temp = make_ssa_name (vec_cmp_type);
10347 		  gassign *new_stmt;
10348 		  if (bitop1 == BIT_NOT_EXPR)
10349 		    new_stmt = gimple_build_assign (new_temp, bitop1,
10350 						    vec_cond_rhs);
10351 		  else
10352 		    new_stmt
10353 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10354 					     vec_cond_rhs);
10355 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10356 		  if (bitop2 == NOP_EXPR)
10357 		    vec_compare = new_temp;
10358 		  else if (bitop2 == BIT_NOT_EXPR)
10359 		    {
10360 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
10361 		      vec_compare = new_temp;
10362 		      std::swap (vec_then_clause, vec_else_clause);
10363 		    }
10364 		  else
10365 		    {
10366 		      vec_compare = make_ssa_name (vec_cmp_type);
10367 		      new_stmt
10368 			= gimple_build_assign (vec_compare, bitop2,
10369 					       vec_cond_lhs, new_temp);
10370 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10371 		    }
10372 		}
10373 	    }
10374 
10375 	  /* If we decided to apply a loop mask to the result of the vector
10376              comparison, AND the comparison with the mask now.  Later passes
10377              should then be able to reuse the AND results between mulitple
10378              vector statements.
10379 
10380 	     For example:
10381 	     for (int i = 0; i < 100; ++i)
10382 	       x[i] = y[i] ? z[i] : 10;
10383 
10384 	     results in following optimized GIMPLE:
10385 
10386 	     mask__35.8_43 = vect__4.7_41 != { 0, ... };
10387 	     vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10388 	     _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10389 	     vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10390 	     vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10391 					       vect_iftmp.11_47, { 10, ... }>;
10392 
10393 	     instead of using a masked and unmasked forms of
10394 	     vec != { 0, ... } (masked in the MASK_LOAD,
10395 	     unmasked in the VEC_COND_EXPR).  */
10396 
10397 	  /* Force vec_compare to be an SSA_NAME rather than a comparison,
10398 	     in cases where that's necessary.  */
10399 
10400 	  if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10401 	    {
10402 	      if (!is_gimple_val (vec_compare))
10403 		{
10404 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
10405 		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
10406 							   vec_compare);
10407 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10408 		  vec_compare = vec_compare_name;
10409 		}
10410 
10411 	      if (must_invert_cmp_result)
10412 		{
10413 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
10414 		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
10415 							   BIT_NOT_EXPR,
10416 							   vec_compare);
10417 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10418 		  vec_compare = vec_compare_name;
10419 		}
10420 
10421 	      if (masks)
10422 		{
10423 		  unsigned vec_num = vec_oprnds0.length ();
10424 		  tree loop_mask
10425 		    = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10426 					  vectype, vec_num * j + i);
10427 		  tree tmp2 = make_ssa_name (vec_cmp_type);
10428 		  gassign *g
10429 		    = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10430 					   loop_mask);
10431 		  vect_finish_stmt_generation (stmt_info, g, gsi);
10432 		  vec_compare = tmp2;
10433 		}
10434 	    }
10435 
10436 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
10437 	    {
10438 	      gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10439 	      tree lhs = gimple_get_lhs (old_stmt);
10440 	      gcall *new_stmt = gimple_build_call_internal
10441 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10442 		 vec_then_clause);
10443 	      gimple_call_set_lhs (new_stmt, lhs);
10444 	      SSA_NAME_DEF_STMT (lhs) = new_stmt;
10445 	      if (old_stmt == gsi_stmt (*gsi))
10446 		new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10447 	      else
10448 		{
10449 		  /* In this case we're moving the definition to later in the
10450 		     block.  That doesn't matter because the only uses of the
10451 		     lhs are in phi statements.  */
10452 		  gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10453 		  gsi_remove (&old_gsi, true);
10454 		  new_stmt_info
10455 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10456 		}
10457 	    }
10458 	  else
10459 	    {
10460 	      new_temp = make_ssa_name (vec_dest);
10461 	      gassign *new_stmt
10462 		= gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10463 				       vec_then_clause, vec_else_clause);
10464 	      new_stmt_info
10465 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10466 	    }
10467           if (slp_node)
10468 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10469         }
10470 
10471         if (slp_node)
10472           continue;
10473 
10474 	if (j == 0)
10475 	  STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10476 	else
10477 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10478 
10479 	prev_stmt_info = new_stmt_info;
10480     }
10481 
10482   vec_oprnds0.release ();
10483   vec_oprnds1.release ();
10484   vec_oprnds2.release ();
10485   vec_oprnds3.release ();
10486 
10487   return true;
10488 }
10489 
10490 /* vectorizable_comparison.
10491 
10492    Check if STMT_INFO is comparison expression that can be vectorized.
10493    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10494    comparison, put it in VEC_STMT, and insert it at GSI.
10495 
10496    Return true if STMT_INFO is vectorizable in this way.  */
10497 
10498 static bool
vectorizable_comparison(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10499 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10500 			 stmt_vec_info *vec_stmt,
10501 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10502 {
10503   vec_info *vinfo = stmt_info->vinfo;
10504   tree lhs, rhs1, rhs2;
10505   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10506   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10507   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10508   tree new_temp;
10509   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10510   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10511   int ndts = 2;
10512   poly_uint64 nunits;
10513   int ncopies;
10514   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10515   stmt_vec_info prev_stmt_info = NULL;
10516   int i, j;
10517   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10518   vec<tree> vec_oprnds0 = vNULL;
10519   vec<tree> vec_oprnds1 = vNULL;
10520   tree mask_type;
10521   tree mask;
10522 
10523   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10524     return false;
10525 
10526   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10527     return false;
10528 
10529   mask_type = vectype;
10530   nunits = TYPE_VECTOR_SUBPARTS (vectype);
10531 
10532   if (slp_node)
10533     ncopies = 1;
10534   else
10535     ncopies = vect_get_num_copies (loop_vinfo, vectype);
10536 
10537   gcc_assert (ncopies >= 1);
10538   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10539     return false;
10540 
10541   if (STMT_VINFO_LIVE_P (stmt_info))
10542     {
10543       if (dump_enabled_p ())
10544 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10545 			 "value used after loop.\n");
10546       return false;
10547     }
10548 
10549   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10550   if (!stmt)
10551     return false;
10552 
10553   code = gimple_assign_rhs_code (stmt);
10554 
10555   if (TREE_CODE_CLASS (code) != tcc_comparison)
10556     return false;
10557 
10558   rhs1 = gimple_assign_rhs1 (stmt);
10559   rhs2 = gimple_assign_rhs2 (stmt);
10560 
10561   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10562     return false;
10563 
10564   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10565     return false;
10566 
10567   if (vectype1 && vectype2
10568       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10569 		   TYPE_VECTOR_SUBPARTS (vectype2)))
10570     return false;
10571 
10572   vectype = vectype1 ? vectype1 : vectype2;
10573 
10574   /* Invariant comparison.  */
10575   if (!vectype)
10576     {
10577       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10578 	vectype = mask_type;
10579       else
10580 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10581 					       slp_node);
10582       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10583 	return false;
10584     }
10585   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10586     return false;
10587 
10588   /* Can't compare mask and non-mask types.  */
10589   if (vectype1 && vectype2
10590       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10591     return false;
10592 
10593   /* Boolean values may have another representation in vectors
10594      and therefore we prefer bit operations over comparison for
10595      them (which also works for scalar masks).  We store opcodes
10596      to use in bitop1 and bitop2.  Statement is vectorized as
10597        BITOP2 (rhs1 BITOP1 rhs2) or
10598        rhs1 BITOP2 (BITOP1 rhs2)
10599      depending on bitop1 and bitop2 arity.  */
10600   bool swap_p = false;
10601   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10602     {
10603       if (code == GT_EXPR)
10604 	{
10605 	  bitop1 = BIT_NOT_EXPR;
10606 	  bitop2 = BIT_AND_EXPR;
10607 	}
10608       else if (code == GE_EXPR)
10609 	{
10610 	  bitop1 = BIT_NOT_EXPR;
10611 	  bitop2 = BIT_IOR_EXPR;
10612 	}
10613       else if (code == LT_EXPR)
10614 	{
10615 	  bitop1 = BIT_NOT_EXPR;
10616 	  bitop2 = BIT_AND_EXPR;
10617 	  swap_p = true;
10618 	}
10619       else if (code == LE_EXPR)
10620 	{
10621 	  bitop1 = BIT_NOT_EXPR;
10622 	  bitop2 = BIT_IOR_EXPR;
10623 	  swap_p = true;
10624 	}
10625       else
10626 	{
10627 	  bitop1 = BIT_XOR_EXPR;
10628 	  if (code == EQ_EXPR)
10629 	    bitop2 = BIT_NOT_EXPR;
10630 	}
10631     }
10632 
10633   if (!vec_stmt)
10634     {
10635       if (bitop1 == NOP_EXPR)
10636 	{
10637 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10638 	    return false;
10639 	}
10640       else
10641 	{
10642 	  machine_mode mode = TYPE_MODE (vectype);
10643 	  optab optab;
10644 
10645 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
10646 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10647 	    return false;
10648 
10649 	  if (bitop2 != NOP_EXPR)
10650 	    {
10651 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
10652 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10653 		return false;
10654 	    }
10655 	}
10656 
10657       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10658       vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10659 			      dts, ndts, slp_node, cost_vec);
10660       return true;
10661     }
10662 
10663   /* Transform.  */
10664   if (!slp_node)
10665     {
10666       vec_oprnds0.create (1);
10667       vec_oprnds1.create (1);
10668     }
10669 
10670   /* Handle def.  */
10671   lhs = gimple_assign_lhs (stmt);
10672   mask = vect_create_destination_var (lhs, mask_type);
10673 
10674   /* Handle cmp expr.  */
10675   for (j = 0; j < ncopies; j++)
10676     {
10677       stmt_vec_info new_stmt_info = NULL;
10678       if (j == 0)
10679 	{
10680 	  if (slp_node)
10681 	    {
10682 	      auto_vec<vec<tree>, 2> vec_defs;
10683 	      vect_get_slp_defs (slp_node, &vec_defs);
10684 	      vec_oprnds1 = vec_defs.pop ();
10685 	      vec_oprnds0 = vec_defs.pop ();
10686 	      if (swap_p)
10687 		std::swap (vec_oprnds0, vec_oprnds1);
10688 	    }
10689 	  else
10690 	    {
10691 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10692 						       vectype);
10693 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10694 						       vectype);
10695 	    }
10696 	}
10697       else
10698 	{
10699 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10700 						     vec_oprnds0.pop ());
10701 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10702 						     vec_oprnds1.pop ());
10703 	}
10704 
10705       if (!slp_node)
10706 	{
10707 	  if (swap_p && j == 0)
10708 	    std::swap (vec_rhs1, vec_rhs2);
10709 	  vec_oprnds0.quick_push (vec_rhs1);
10710 	  vec_oprnds1.quick_push (vec_rhs2);
10711 	}
10712 
10713       /* Arguments are ready.  Create the new vector stmt.  */
10714       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10715 	{
10716 	  vec_rhs2 = vec_oprnds1[i];
10717 
10718 	  new_temp = make_ssa_name (mask);
10719 	  if (bitop1 == NOP_EXPR)
10720 	    {
10721 	      gassign *new_stmt = gimple_build_assign (new_temp, code,
10722 						       vec_rhs1, vec_rhs2);
10723 	      new_stmt_info
10724 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10725 	    }
10726 	  else
10727 	    {
10728 	      gassign *new_stmt;
10729 	      if (bitop1 == BIT_NOT_EXPR)
10730 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10731 	      else
10732 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10733 						vec_rhs2);
10734 	      new_stmt_info
10735 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10736 	      if (bitop2 != NOP_EXPR)
10737 		{
10738 		  tree res = make_ssa_name (mask);
10739 		  if (bitop2 == BIT_NOT_EXPR)
10740 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
10741 		  else
10742 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10743 						    new_temp);
10744 		  new_stmt_info
10745 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10746 		}
10747 	    }
10748 	  if (slp_node)
10749 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10750 	}
10751 
10752       if (slp_node)
10753 	continue;
10754 
10755       if (j == 0)
10756 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10757       else
10758 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10759 
10760       prev_stmt_info = new_stmt_info;
10761     }
10762 
10763   vec_oprnds0.release ();
10764   vec_oprnds1.release ();
10765 
10766   return true;
10767 }
10768 
10769 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10770    can handle all live statements in the node.  Otherwise return true
10771    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10772    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
10773 
10774 static bool
can_vectorize_live_stmts(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10775 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10776 			  slp_tree slp_node, slp_instance slp_node_instance,
10777 			  bool vec_stmt_p,
10778 			  stmt_vector_for_cost *cost_vec)
10779 {
10780   if (slp_node)
10781     {
10782       stmt_vec_info slp_stmt_info;
10783       unsigned int i;
10784       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10785 	{
10786 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
10787 	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10788 					       slp_node_instance, i,
10789 					       vec_stmt_p, cost_vec))
10790 	    return false;
10791 	}
10792     }
10793   else if (STMT_VINFO_LIVE_P (stmt_info)
10794 	   && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10795 					    slp_node_instance, -1,
10796 					    vec_stmt_p, cost_vec))
10797     return false;
10798 
10799   return true;
10800 }
10801 
10802 /* Make sure the statement is vectorizable.  */
10803 
10804 opt_result
vect_analyze_stmt(stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10805 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10806 		   slp_tree node, slp_instance node_instance,
10807 		   stmt_vector_for_cost *cost_vec)
10808 {
10809   vec_info *vinfo = stmt_info->vinfo;
10810   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10811   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10812   bool ok;
10813   gimple_seq pattern_def_seq;
10814 
10815   if (dump_enabled_p ())
10816     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10817 		     stmt_info->stmt);
10818 
10819   if (gimple_has_volatile_ops (stmt_info->stmt))
10820     return opt_result::failure_at (stmt_info->stmt,
10821 				   "not vectorized:"
10822 				   " stmt has volatile operands: %G\n",
10823 				   stmt_info->stmt);
10824 
10825   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10826       && node == NULL
10827       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10828     {
10829       gimple_stmt_iterator si;
10830 
10831       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10832 	{
10833 	  stmt_vec_info pattern_def_stmt_info
10834 	    = vinfo->lookup_stmt (gsi_stmt (si));
10835 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10836 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10837 	    {
10838 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
10839 	      if (dump_enabled_p ())
10840 		dump_printf_loc (MSG_NOTE, vect_location,
10841 				 "==> examining pattern def statement: %G",
10842 				 pattern_def_stmt_info->stmt);
10843 
10844 	      opt_result res
10845 		= vect_analyze_stmt (pattern_def_stmt_info,
10846 				     need_to_vectorize, node, node_instance,
10847 				     cost_vec);
10848 	      if (!res)
10849 		return res;
10850 	    }
10851 	}
10852     }
10853 
10854   /* Skip stmts that do not need to be vectorized. In loops this is expected
10855      to include:
10856      - the COND_EXPR which is the loop exit condition
10857      - any LABEL_EXPRs in the loop
10858      - computations that are used only for array indexing or loop control.
10859      In basic blocks we only analyze statements that are a part of some SLP
10860      instance, therefore, all the statements are relevant.
10861 
10862      Pattern statement needs to be analyzed instead of the original statement
10863      if the original statement is not relevant.  Otherwise, we analyze both
10864      statements.  In basic blocks we are called from some SLP instance
10865      traversal, don't analyze pattern stmts instead, the pattern stmts
10866      already will be part of SLP instance.  */
10867 
10868   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10869   if (!STMT_VINFO_RELEVANT_P (stmt_info)
10870       && !STMT_VINFO_LIVE_P (stmt_info))
10871     {
10872       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10873 	  && pattern_stmt_info
10874 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10875 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10876         {
10877           /* Analyze PATTERN_STMT instead of the original stmt.  */
10878 	  stmt_info = pattern_stmt_info;
10879           if (dump_enabled_p ())
10880 	    dump_printf_loc (MSG_NOTE, vect_location,
10881 			     "==> examining pattern statement: %G",
10882 			     stmt_info->stmt);
10883         }
10884       else
10885         {
10886           if (dump_enabled_p ())
10887             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10888 
10889           return opt_result::success ();
10890         }
10891     }
10892   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10893 	   && node == NULL
10894 	   && pattern_stmt_info
10895 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10896 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10897     {
10898       /* Analyze PATTERN_STMT too.  */
10899       if (dump_enabled_p ())
10900 	dump_printf_loc (MSG_NOTE, vect_location,
10901 			 "==> examining pattern statement: %G",
10902 			 pattern_stmt_info->stmt);
10903 
10904       opt_result res
10905 	= vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10906 			     node_instance, cost_vec);
10907       if (!res)
10908 	return res;
10909    }
10910 
10911   switch (STMT_VINFO_DEF_TYPE (stmt_info))
10912     {
10913       case vect_internal_def:
10914         break;
10915 
10916       case vect_reduction_def:
10917       case vect_nested_cycle:
10918          gcc_assert (!bb_vinfo
10919 		     && (relevance == vect_used_in_outer
10920 			 || relevance == vect_used_in_outer_by_reduction
10921 			 || relevance == vect_used_by_reduction
10922 			 || relevance == vect_unused_in_scope
10923 			 || relevance == vect_used_only_live));
10924          break;
10925 
10926       case vect_induction_def:
10927 	gcc_assert (!bb_vinfo);
10928 	break;
10929 
10930       case vect_constant_def:
10931       case vect_external_def:
10932       case vect_unknown_def_type:
10933       default:
10934         gcc_unreachable ();
10935     }
10936 
10937   if (STMT_VINFO_RELEVANT_P (stmt_info))
10938     {
10939       tree type = gimple_expr_type (stmt_info->stmt);
10940       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10941       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10942       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10943 		  || (call && gimple_call_lhs (call) == NULL_TREE));
10944       *need_to_vectorize = true;
10945     }
10946 
10947   if (PURE_SLP_STMT (stmt_info) && !node)
10948     {
10949       if (dump_enabled_p ())
10950 	dump_printf_loc (MSG_NOTE, vect_location,
10951 			 "handled only by SLP analysis\n");
10952       return opt_result::success ();
10953     }
10954 
10955   ok = true;
10956   if (!bb_vinfo
10957       && (STMT_VINFO_RELEVANT_P (stmt_info)
10958 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10959     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10960        -mveclibabi= takes preference over library functions with
10961        the simd attribute.  */
10962     ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10963 	  || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10964 					   cost_vec)
10965 	  || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10966 	  || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10967 	  || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10968 	  || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10969 				cost_vec)
10970 	  || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10971 	  || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10972 	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10973 	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10974 	  || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10975 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
10976 				      cost_vec)
10977 	  || vectorizable_lc_phi (stmt_info, NULL, node));
10978   else
10979     {
10980       if (bb_vinfo)
10981 	ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10982 	      || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10983 					       cost_vec)
10984 	      || vectorizable_conversion (stmt_info, NULL, NULL, node,
10985 					  cost_vec)
10986 	      || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10987 	      || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10988 	      || vectorizable_assignment (stmt_info, NULL, NULL, node,
10989 					  cost_vec)
10990 	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10991 				    cost_vec)
10992 	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10993 	      || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10994 	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
10995 					  cost_vec));
10996     }
10997 
10998   if (!ok)
10999     return opt_result::failure_at (stmt_info->stmt,
11000 				   "not vectorized:"
11001 				   " relevant stmt not supported: %G",
11002 				   stmt_info->stmt);
11003 
11004   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11005       need extra handling, except for vectorizable reductions.  */
11006   if (!bb_vinfo
11007       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11008       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11009       && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
11010 				    false, cost_vec))
11011     return opt_result::failure_at (stmt_info->stmt,
11012 				   "not vectorized:"
11013 				   " live stmt not supported: %G",
11014 				   stmt_info->stmt);
11015 
11016   return opt_result::success ();
11017 }
11018 
11019 
11020 /* Function vect_transform_stmt.
11021 
11022    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
11023 
11024 bool
vect_transform_stmt(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11025 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11026 		     slp_tree slp_node, slp_instance slp_node_instance)
11027 {
11028   vec_info *vinfo = stmt_info->vinfo;
11029   bool is_store = false;
11030   stmt_vec_info vec_stmt = NULL;
11031   bool done;
11032 
11033   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11034   stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11035 
11036   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
11037 		   && nested_in_vect_loop_p
11038 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
11039 			 stmt_info));
11040 
11041   gimple *stmt = stmt_info->stmt;
11042   switch (STMT_VINFO_TYPE (stmt_info))
11043     {
11044     case type_demotion_vec_info_type:
11045     case type_promotion_vec_info_type:
11046     case type_conversion_vec_info_type:
11047       done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
11048 				      NULL);
11049       gcc_assert (done);
11050       break;
11051 
11052     case induc_vec_info_type:
11053       done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
11054 				     NULL);
11055       gcc_assert (done);
11056       break;
11057 
11058     case shift_vec_info_type:
11059       done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11060       gcc_assert (done);
11061       break;
11062 
11063     case op_vec_info_type:
11064       done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
11065 				     NULL);
11066       gcc_assert (done);
11067       break;
11068 
11069     case assignment_vec_info_type:
11070       done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
11071 				      NULL);
11072       gcc_assert (done);
11073       break;
11074 
11075     case load_vec_info_type:
11076       done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
11077                                 slp_node_instance, NULL);
11078       gcc_assert (done);
11079       break;
11080 
11081     case store_vec_info_type:
11082       done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11083       gcc_assert (done);
11084       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11085 	{
11086 	  /* In case of interleaving, the whole chain is vectorized when the
11087 	     last store in the chain is reached.  Store stmts before the last
11088 	     one are skipped, and there vec_stmt_info shouldn't be freed
11089 	     meanwhile.  */
11090 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11091 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11092 	    is_store = true;
11093 	}
11094       else
11095 	is_store = true;
11096       break;
11097 
11098     case condition_vec_info_type:
11099       done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11100       gcc_assert (done);
11101       break;
11102 
11103     case comparison_vec_info_type:
11104       done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
11105 				      slp_node, NULL);
11106       gcc_assert (done);
11107       break;
11108 
11109     case call_vec_info_type:
11110       done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11111       stmt = gsi_stmt (*gsi);
11112       break;
11113 
11114     case call_simd_clone_vec_info_type:
11115       done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
11116 					   slp_node, NULL);
11117       stmt = gsi_stmt (*gsi);
11118       break;
11119 
11120     case reduc_vec_info_type:
11121       done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
11122       gcc_assert (done);
11123       break;
11124 
11125     case cycle_phi_info_type:
11126       done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
11127 				       slp_node_instance);
11128       gcc_assert (done);
11129       break;
11130 
11131     case lc_phi_info_type:
11132       done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
11133       gcc_assert (done);
11134       break;
11135 
11136     default:
11137       if (!STMT_VINFO_LIVE_P (stmt_info))
11138 	{
11139 	  if (dump_enabled_p ())
11140 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11141                              "stmt not supported.\n");
11142 	  gcc_unreachable ();
11143 	}
11144     }
11145 
11146   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11147      This would break hybrid SLP vectorization.  */
11148   if (slp_node)
11149     gcc_assert (!vec_stmt
11150 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11151 
11152   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11153      is being vectorized, but outside the immediately enclosing loop.  */
11154   if (vec_stmt
11155       && nested_p
11156       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11157       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11158           || STMT_VINFO_RELEVANT (stmt_info) ==
11159                                            vect_used_in_outer_by_reduction))
11160     {
11161       class loop *innerloop = LOOP_VINFO_LOOP (
11162                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11163       imm_use_iterator imm_iter;
11164       use_operand_p use_p;
11165       tree scalar_dest;
11166 
11167       if (dump_enabled_p ())
11168         dump_printf_loc (MSG_NOTE, vect_location,
11169                          "Record the vdef for outer-loop vectorization.\n");
11170 
11171       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11172         (to be used when vectorizing outer-loop stmts that use the DEF of
11173         STMT).  */
11174       if (gimple_code (stmt) == GIMPLE_PHI)
11175         scalar_dest = PHI_RESULT (stmt);
11176       else
11177         scalar_dest = gimple_get_lhs (stmt);
11178 
11179       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11180 	if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11181 	  {
11182 	    stmt_vec_info exit_phi_info
11183 	      = vinfo->lookup_stmt (USE_STMT (use_p));
11184 	    STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11185 	  }
11186     }
11187 
11188   if (vec_stmt)
11189     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11190 
11191   if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11192     return is_store;
11193 
11194   /* Handle stmts whose DEF is used outside the loop-nest that is
11195      being vectorized.  */
11196   done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11197 				   slp_node_instance, true, NULL);
11198   gcc_assert (done);
11199 
11200   return false;
11201 }
11202 
11203 
11204 /* Remove a group of stores (for SLP or interleaving), free their
11205    stmt_vec_info.  */
11206 
11207 void
vect_remove_stores(stmt_vec_info first_stmt_info)11208 vect_remove_stores (stmt_vec_info first_stmt_info)
11209 {
11210   vec_info *vinfo = first_stmt_info->vinfo;
11211   stmt_vec_info next_stmt_info = first_stmt_info;
11212 
11213   while (next_stmt_info)
11214     {
11215       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11216       next_stmt_info = vect_orig_stmt (next_stmt_info);
11217       /* Free the attached stmt_vec_info and remove the stmt.  */
11218       vinfo->remove_stmt (next_stmt_info);
11219       next_stmt_info = tmp;
11220     }
11221 }
11222 
11223 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11224    elements of type SCALAR_TYPE, or null if the target doesn't support
11225    such a type.
11226 
11227    If NUNITS is zero, return a vector type that contains elements of
11228    type SCALAR_TYPE, choosing whichever vector size the target prefers.
11229 
11230    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11231    for this vectorization region and want to "autodetect" the best choice.
11232    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11233    and we want the new type to be interoperable with it.   PREVAILING_MODE
11234    in this case can be a scalar integer mode or a vector mode; when it
11235    is a vector mode, the function acts like a tree-level version of
11236    related_vector_mode.  */
11237 
11238 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11239 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11240 				     tree scalar_type, poly_uint64 nunits)
11241 {
11242   tree orig_scalar_type = scalar_type;
11243   scalar_mode inner_mode;
11244   machine_mode simd_mode;
11245   tree vectype;
11246 
11247   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11248       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11249     return NULL_TREE;
11250 
11251   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11252 
11253   /* For vector types of elements whose mode precision doesn't
11254      match their types precision we use a element type of mode
11255      precision.  The vectorization routines will have to make sure
11256      they support the proper result truncation/extension.
11257      We also make sure to build vector types with INTEGER_TYPE
11258      component type only.  */
11259   if (INTEGRAL_TYPE_P (scalar_type)
11260       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11261 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
11262     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11263 						  TYPE_UNSIGNED (scalar_type));
11264 
11265   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11266      When the component mode passes the above test simply use a type
11267      corresponding to that mode.  The theory is that any use that
11268      would cause problems with this will disable vectorization anyway.  */
11269   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11270 	   && !INTEGRAL_TYPE_P (scalar_type))
11271     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11272 
11273   /* We can't build a vector type of elements with alignment bigger than
11274      their size.  */
11275   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11276     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11277 						  TYPE_UNSIGNED (scalar_type));
11278 
11279   /* If we felt back to using the mode fail if there was
11280      no scalar type for it.  */
11281   if (scalar_type == NULL_TREE)
11282     return NULL_TREE;
11283 
11284   /* If no prevailing mode was supplied, use the mode the target prefers.
11285      Otherwise lookup a vector mode based on the prevailing mode.  */
11286   if (prevailing_mode == VOIDmode)
11287     {
11288       gcc_assert (known_eq (nunits, 0U));
11289       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11290       if (SCALAR_INT_MODE_P (simd_mode))
11291 	{
11292 	  /* Traditional behavior is not to take the integer mode
11293 	     literally, but simply to use it as a way of determining
11294 	     the vector size.  It is up to mode_for_vector to decide
11295 	     what the TYPE_MODE should be.
11296 
11297 	     Note that nunits == 1 is allowed in order to support single
11298 	     element vector types.  */
11299 	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11300 	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11301 	    return NULL_TREE;
11302 	}
11303     }
11304   else if (SCALAR_INT_MODE_P (prevailing_mode)
11305 	   || !related_vector_mode (prevailing_mode,
11306 				    inner_mode, nunits).exists (&simd_mode))
11307     {
11308       /* Fall back to using mode_for_vector, mostly in the hope of being
11309 	 able to use an integer mode.  */
11310       if (known_eq (nunits, 0U)
11311 	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11312 	return NULL_TREE;
11313 
11314       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11315 	return NULL_TREE;
11316     }
11317 
11318   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11319 
11320   /* In cases where the mode was chosen by mode_for_vector, check that
11321      the target actually supports the chosen mode, or that it at least
11322      allows the vector mode to be replaced by a like-sized integer.  */
11323   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11324       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11325     return NULL_TREE;
11326 
11327   /* Re-attach the address-space qualifier if we canonicalized the scalar
11328      type.  */
11329   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11330     return build_qualified_type
11331 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11332 
11333   return vectype;
11334 }
11335 
11336 /* Function get_vectype_for_scalar_type.
11337 
11338    Returns the vector type corresponding to SCALAR_TYPE as supported
11339    by the target.  If GROUP_SIZE is nonzero and we're performing BB
11340    vectorization, make sure that the number of elements in the vector
11341    is no bigger than GROUP_SIZE.  */
11342 
11343 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11344 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11345 			     unsigned int group_size)
11346 {
11347   /* For BB vectorization, we should always have a group size once we've
11348      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11349      are tentative requests during things like early data reference
11350      analysis and pattern recognition.  */
11351   if (is_a <bb_vec_info> (vinfo))
11352     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11353   else
11354     group_size = 0;
11355 
11356   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11357 						      scalar_type);
11358   if (vectype && vinfo->vector_mode == VOIDmode)
11359     vinfo->vector_mode = TYPE_MODE (vectype);
11360 
11361   /* Register the natural choice of vector type, before the group size
11362      has been applied.  */
11363   if (vectype)
11364     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11365 
11366   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11367      try again with an explicit number of elements.  */
11368   if (vectype
11369       && group_size
11370       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11371     {
11372       /* Start with the biggest number of units that fits within
11373 	 GROUP_SIZE and halve it until we find a valid vector type.
11374 	 Usually either the first attempt will succeed or all will
11375 	 fail (in the latter case because GROUP_SIZE is too small
11376 	 for the target), but it's possible that a target could have
11377 	 a hole between supported vector types.
11378 
11379 	 If GROUP_SIZE is not a power of 2, this has the effect of
11380 	 trying the largest power of 2 that fits within the group,
11381 	 even though the group is not a multiple of that vector size.
11382 	 The BB vectorizer will then try to carve up the group into
11383 	 smaller pieces.  */
11384       unsigned int nunits = 1 << floor_log2 (group_size);
11385       do
11386 	{
11387 	  vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11388 							 scalar_type, nunits);
11389 	  nunits /= 2;
11390 	}
11391       while (nunits > 1 && !vectype);
11392     }
11393 
11394   return vectype;
11395 }
11396 
11397 /* Return the vector type corresponding to SCALAR_TYPE as supported
11398    by the target.  NODE, if nonnull, is the SLP tree node that will
11399    use the returned vector type.  */
11400 
11401 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11402 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11403 {
11404   unsigned int group_size = 0;
11405   if (node)
11406     {
11407       group_size = SLP_TREE_SCALAR_OPS (node).length ();
11408       if (group_size == 0)
11409 	group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11410     }
11411   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11412 }
11413 
11414 /* Function get_mask_type_for_scalar_type.
11415 
11416    Returns the mask type corresponding to a result of comparison
11417    of vectors of specified SCALAR_TYPE as supported by target.
11418    If GROUP_SIZE is nonzero and we're performing BB vectorization,
11419    make sure that the number of elements in the vector is no bigger
11420    than GROUP_SIZE.  */
11421 
11422 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11423 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11424 			       unsigned int group_size)
11425 {
11426   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11427 
11428   if (!vectype)
11429     return NULL;
11430 
11431   return truth_type_for (vectype);
11432 }
11433 
11434 /* Function get_same_sized_vectype
11435 
11436    Returns a vector type corresponding to SCALAR_TYPE of size
11437    VECTOR_TYPE if supported by the target.  */
11438 
11439 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11440 get_same_sized_vectype (tree scalar_type, tree vector_type)
11441 {
11442   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11443     return truth_type_for (vector_type);
11444 
11445   poly_uint64 nunits;
11446   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11447 		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11448     return NULL_TREE;
11449 
11450   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11451 					      scalar_type, nunits);
11452 }
11453 
11454 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11455    would not change the chosen vector modes.  */
11456 
11457 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11458 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11459 {
11460   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11461        i != vinfo->used_vector_modes.end (); ++i)
11462     if (!VECTOR_MODE_P (*i)
11463 	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11464       return false;
11465   return true;
11466 }
11467 
11468 /* Function vect_is_simple_use.
11469 
11470    Input:
11471    VINFO - the vect info of the loop or basic block that is being vectorized.
11472    OPERAND - operand in the loop or bb.
11473    Output:
11474    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11475      case OPERAND is an SSA_NAME that is defined in the vectorizable region
11476    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11477      the definition could be anywhere in the function
11478    DT - the type of definition
11479 
11480    Returns whether a stmt with OPERAND can be vectorized.
11481    For loops, supportable operands are constants, loop invariants, and operands
11482    that are defined by the current iteration of the loop.  Unsupportable
11483    operands are those that are defined by a previous iteration of the loop (as
11484    is the case in reduction/induction computations).
11485    For basic blocks, supportable operands are constants and bb invariants.
11486    For now, operands defined outside the basic block are not supported.  */
11487 
11488 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11489 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11490 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11491 {
11492   if (def_stmt_info_out)
11493     *def_stmt_info_out = NULL;
11494   if (def_stmt_out)
11495     *def_stmt_out = NULL;
11496   *dt = vect_unknown_def_type;
11497 
11498   if (dump_enabled_p ())
11499     {
11500       dump_printf_loc (MSG_NOTE, vect_location,
11501                        "vect_is_simple_use: operand ");
11502       if (TREE_CODE (operand) == SSA_NAME
11503 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
11504 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11505       else
11506 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11507     }
11508 
11509   if (CONSTANT_CLASS_P (operand))
11510     *dt = vect_constant_def;
11511   else if (is_gimple_min_invariant (operand))
11512     *dt = vect_external_def;
11513   else if (TREE_CODE (operand) != SSA_NAME)
11514     *dt = vect_unknown_def_type;
11515   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11516     *dt = vect_external_def;
11517   else
11518     {
11519       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11520       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11521       if (!stmt_vinfo)
11522 	*dt = vect_external_def;
11523       else
11524 	{
11525 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11526 	  def_stmt = stmt_vinfo->stmt;
11527 	  switch (gimple_code (def_stmt))
11528 	    {
11529 	    case GIMPLE_PHI:
11530 	    case GIMPLE_ASSIGN:
11531 	    case GIMPLE_CALL:
11532 	      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11533 	      break;
11534 	    default:
11535 	      *dt = vect_unknown_def_type;
11536 	      break;
11537 	    }
11538 	  if (def_stmt_info_out)
11539 	    *def_stmt_info_out = stmt_vinfo;
11540 	}
11541       if (def_stmt_out)
11542 	*def_stmt_out = def_stmt;
11543     }
11544 
11545   if (dump_enabled_p ())
11546     {
11547       dump_printf (MSG_NOTE, ", type of def: ");
11548       switch (*dt)
11549 	{
11550 	case vect_uninitialized_def:
11551 	  dump_printf (MSG_NOTE, "uninitialized\n");
11552 	  break;
11553 	case vect_constant_def:
11554 	  dump_printf (MSG_NOTE, "constant\n");
11555 	  break;
11556 	case vect_external_def:
11557 	  dump_printf (MSG_NOTE, "external\n");
11558 	  break;
11559 	case vect_internal_def:
11560 	  dump_printf (MSG_NOTE, "internal\n");
11561 	  break;
11562 	case vect_induction_def:
11563 	  dump_printf (MSG_NOTE, "induction\n");
11564 	  break;
11565 	case vect_reduction_def:
11566 	  dump_printf (MSG_NOTE, "reduction\n");
11567 	  break;
11568 	case vect_double_reduction_def:
11569 	  dump_printf (MSG_NOTE, "double reduction\n");
11570 	  break;
11571 	case vect_nested_cycle:
11572 	  dump_printf (MSG_NOTE, "nested cycle\n");
11573 	  break;
11574 	case vect_unknown_def_type:
11575 	  dump_printf (MSG_NOTE, "unknown\n");
11576 	  break;
11577 	}
11578     }
11579 
11580   if (*dt == vect_unknown_def_type)
11581     {
11582       if (dump_enabled_p ())
11583         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11584                          "Unsupported pattern.\n");
11585       return false;
11586     }
11587 
11588   return true;
11589 }
11590 
11591 /* Function vect_is_simple_use.
11592 
11593    Same as vect_is_simple_use but also determines the vector operand
11594    type of OPERAND and stores it to *VECTYPE.  If the definition of
11595    OPERAND is vect_uninitialized_def, vect_constant_def or
11596    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11597    is responsible to compute the best suited vector type for the
11598    scalar operand.  */
11599 
11600 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11601 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11602 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
11603 		    gimple **def_stmt_out)
11604 {
11605   stmt_vec_info def_stmt_info;
11606   gimple *def_stmt;
11607   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11608     return false;
11609 
11610   if (def_stmt_out)
11611     *def_stmt_out = def_stmt;
11612   if (def_stmt_info_out)
11613     *def_stmt_info_out = def_stmt_info;
11614 
11615   /* Now get a vector type if the def is internal, otherwise supply
11616      NULL_TREE and leave it up to the caller to figure out a proper
11617      type for the use stmt.  */
11618   if (*dt == vect_internal_def
11619       || *dt == vect_induction_def
11620       || *dt == vect_reduction_def
11621       || *dt == vect_double_reduction_def
11622       || *dt == vect_nested_cycle)
11623     {
11624       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11625       gcc_assert (*vectype != NULL_TREE);
11626       if (dump_enabled_p ())
11627 	dump_printf_loc (MSG_NOTE, vect_location,
11628 			 "vect_is_simple_use: vectype %T\n", *vectype);
11629     }
11630   else if (*dt == vect_uninitialized_def
11631 	   || *dt == vect_constant_def
11632 	   || *dt == vect_external_def)
11633     *vectype = NULL_TREE;
11634   else
11635     gcc_unreachable ();
11636 
11637   return true;
11638 }
11639 
11640 
11641 /* Function supportable_widening_operation
11642 
11643    Check whether an operation represented by the code CODE is a
11644    widening operation that is supported by the target platform in
11645    vector form (i.e., when operating on arguments of type VECTYPE_IN
11646    producing a result of type VECTYPE_OUT).
11647 
11648    Widening operations we currently support are NOP (CONVERT), FLOAT,
11649    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
11650    are supported by the target platform either directly (via vector
11651    tree-codes), or via target builtins.
11652 
11653    Output:
11654    - CODE1 and CODE2 are codes of vector operations to be used when
11655    vectorizing the operation, if available.
11656    - MULTI_STEP_CVT determines the number of required intermediate steps in
11657    case of multi-step conversion (like char->short->int - in that case
11658    MULTI_STEP_CVT will be 1).
11659    - INTERM_TYPES contains the intermediate type required to perform the
11660    widening operation (short in the above example).  */
11661 
11662 bool
supportable_widening_operation(enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11663 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11664 				tree vectype_out, tree vectype_in,
11665                                 enum tree_code *code1, enum tree_code *code2,
11666                                 int *multi_step_cvt,
11667                                 vec<tree> *interm_types)
11668 {
11669   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11670   class loop *vect_loop = NULL;
11671   machine_mode vec_mode;
11672   enum insn_code icode1, icode2;
11673   optab optab1, optab2;
11674   tree vectype = vectype_in;
11675   tree wide_vectype = vectype_out;
11676   enum tree_code c1, c2;
11677   int i;
11678   tree prev_type, intermediate_type;
11679   machine_mode intermediate_mode, prev_mode;
11680   optab optab3, optab4;
11681 
11682   *multi_step_cvt = 0;
11683   if (loop_info)
11684     vect_loop = LOOP_VINFO_LOOP (loop_info);
11685 
11686   switch (code)
11687     {
11688     case WIDEN_MULT_EXPR:
11689       /* The result of a vectorized widening operation usually requires
11690 	 two vectors (because the widened results do not fit into one vector).
11691 	 The generated vector results would normally be expected to be
11692 	 generated in the same order as in the original scalar computation,
11693 	 i.e. if 8 results are generated in each vector iteration, they are
11694 	 to be organized as follows:
11695 		vect1: [res1,res2,res3,res4],
11696 		vect2: [res5,res6,res7,res8].
11697 
11698 	 However, in the special case that the result of the widening
11699 	 operation is used in a reduction computation only, the order doesn't
11700 	 matter (because when vectorizing a reduction we change the order of
11701 	 the computation).  Some targets can take advantage of this and
11702 	 generate more efficient code.  For example, targets like Altivec,
11703 	 that support widen_mult using a sequence of {mult_even,mult_odd}
11704 	 generate the following vectors:
11705 		vect1: [res1,res3,res5,res7],
11706 		vect2: [res2,res4,res6,res8].
11707 
11708 	 When vectorizing outer-loops, we execute the inner-loop sequentially
11709 	 (each vectorized inner-loop iteration contributes to VF outer-loop
11710 	 iterations in parallel).  We therefore don't allow to change the
11711 	 order of the computation in the inner-loop during outer-loop
11712 	 vectorization.  */
11713       /* TODO: Another case in which order doesn't *really* matter is when we
11714 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
11715 	 Normally, pack_trunc performs an even/odd permute, whereas the
11716 	 repack from an even/odd expansion would be an interleave, which
11717 	 would be significantly simpler for e.g. AVX2.  */
11718       /* In any case, in order to avoid duplicating the code below, recurse
11719 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
11720 	 are properly set up for the caller.  If we fail, we'll continue with
11721 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
11722       if (vect_loop
11723 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11724 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
11725 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11726 					     stmt_info, vectype_out,
11727 					     vectype_in, code1, code2,
11728 					     multi_step_cvt, interm_types))
11729         {
11730           /* Elements in a vector with vect_used_by_reduction property cannot
11731              be reordered if the use chain with this property does not have the
11732              same operation.  One such an example is s += a * b, where elements
11733              in a and b cannot be reordered.  Here we check if the vector defined
11734              by STMT is only directly used in the reduction statement.  */
11735 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
11736 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11737 	  if (use_stmt_info
11738 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11739 	    return true;
11740         }
11741       c1 = VEC_WIDEN_MULT_LO_EXPR;
11742       c2 = VEC_WIDEN_MULT_HI_EXPR;
11743       break;
11744 
11745     case DOT_PROD_EXPR:
11746       c1 = DOT_PROD_EXPR;
11747       c2 = DOT_PROD_EXPR;
11748       break;
11749 
11750     case SAD_EXPR:
11751       c1 = SAD_EXPR;
11752       c2 = SAD_EXPR;
11753       break;
11754 
11755     case VEC_WIDEN_MULT_EVEN_EXPR:
11756       /* Support the recursion induced just above.  */
11757       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11758       c2 = VEC_WIDEN_MULT_ODD_EXPR;
11759       break;
11760 
11761     case WIDEN_LSHIFT_EXPR:
11762       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11763       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11764       break;
11765 
11766     CASE_CONVERT:
11767       c1 = VEC_UNPACK_LO_EXPR;
11768       c2 = VEC_UNPACK_HI_EXPR;
11769       break;
11770 
11771     case FLOAT_EXPR:
11772       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11773       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11774       break;
11775 
11776     case FIX_TRUNC_EXPR:
11777       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11778       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11779       break;
11780 
11781     default:
11782       gcc_unreachable ();
11783     }
11784 
11785   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11786     std::swap (c1, c2);
11787 
11788   if (code == FIX_TRUNC_EXPR)
11789     {
11790       /* The signedness is determined from output operand.  */
11791       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11792       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11793     }
11794   else if (CONVERT_EXPR_CODE_P (code)
11795 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11796 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
11797 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11798 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11799     {
11800       /* If the input and result modes are the same, a different optab
11801 	 is needed where we pass in the number of units in vectype.  */
11802       optab1 = vec_unpacks_sbool_lo_optab;
11803       optab2 = vec_unpacks_sbool_hi_optab;
11804     }
11805   else
11806     {
11807       optab1 = optab_for_tree_code (c1, vectype, optab_default);
11808       optab2 = optab_for_tree_code (c2, vectype, optab_default);
11809     }
11810 
11811   if (!optab1 || !optab2)
11812     return false;
11813 
11814   vec_mode = TYPE_MODE (vectype);
11815   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11816        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11817     return false;
11818 
11819   *code1 = c1;
11820   *code2 = c2;
11821 
11822   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11823       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11824     {
11825       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11826 	return true;
11827       /* For scalar masks we may have different boolean
11828 	 vector types having the same QImode.  Thus we
11829 	 add additional check for elements number.  */
11830       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11831 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11832 	return true;
11833     }
11834 
11835   /* Check if it's a multi-step conversion that can be done using intermediate
11836      types.  */
11837 
11838   prev_type = vectype;
11839   prev_mode = vec_mode;
11840 
11841   if (!CONVERT_EXPR_CODE_P (code))
11842     return false;
11843 
11844   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11845      intermediate steps in promotion sequence.  We try
11846      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11847      not.  */
11848   interm_types->create (MAX_INTERM_CVT_STEPS);
11849   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11850     {
11851       intermediate_mode = insn_data[icode1].operand[0].mode;
11852       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11853 	intermediate_type
11854 	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
11855       else
11856 	intermediate_type
11857 	  = lang_hooks.types.type_for_mode (intermediate_mode,
11858 					    TYPE_UNSIGNED (prev_type));
11859 
11860       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11861 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
11862 	  && intermediate_mode == prev_mode
11863 	  && SCALAR_INT_MODE_P (prev_mode))
11864 	{
11865 	  /* If the input and result modes are the same, a different optab
11866 	     is needed where we pass in the number of units in vectype.  */
11867 	  optab3 = vec_unpacks_sbool_lo_optab;
11868 	  optab4 = vec_unpacks_sbool_hi_optab;
11869 	}
11870       else
11871 	{
11872 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11873 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11874 	}
11875 
11876       if (!optab3 || !optab4
11877           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11878 	  || insn_data[icode1].operand[0].mode != intermediate_mode
11879 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11880 	  || insn_data[icode2].operand[0].mode != intermediate_mode
11881 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
11882 	      == CODE_FOR_nothing)
11883 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
11884 	      == CODE_FOR_nothing))
11885 	break;
11886 
11887       interm_types->quick_push (intermediate_type);
11888       (*multi_step_cvt)++;
11889 
11890       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11891 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11892 	{
11893 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11894 	    return true;
11895 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11896 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11897 	    return true;
11898 	}
11899 
11900       prev_type = intermediate_type;
11901       prev_mode = intermediate_mode;
11902     }
11903 
11904   interm_types->release ();
11905   return false;
11906 }
11907 
11908 
11909 /* Function supportable_narrowing_operation
11910 
11911    Check whether an operation represented by the code CODE is a
11912    narrowing operation that is supported by the target platform in
11913    vector form (i.e., when operating on arguments of type VECTYPE_IN
11914    and producing a result of type VECTYPE_OUT).
11915 
11916    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11917    and FLOAT.  This function checks if these operations are supported by
11918    the target platform directly via vector tree-codes.
11919 
11920    Output:
11921    - CODE1 is the code of a vector operation to be used when
11922    vectorizing the operation, if available.
11923    - MULTI_STEP_CVT determines the number of required intermediate steps in
11924    case of multi-step conversion (like int->short->char - in that case
11925    MULTI_STEP_CVT will be 1).
11926    - INTERM_TYPES contains the intermediate type required to perform the
11927    narrowing operation (short in the above example).   */
11928 
11929 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)11930 supportable_narrowing_operation (enum tree_code code,
11931 				 tree vectype_out, tree vectype_in,
11932 				 enum tree_code *code1, int *multi_step_cvt,
11933                                  vec<tree> *interm_types)
11934 {
11935   machine_mode vec_mode;
11936   enum insn_code icode1;
11937   optab optab1, interm_optab;
11938   tree vectype = vectype_in;
11939   tree narrow_vectype = vectype_out;
11940   enum tree_code c1;
11941   tree intermediate_type, prev_type;
11942   machine_mode intermediate_mode, prev_mode;
11943   int i;
11944   bool uns;
11945 
11946   *multi_step_cvt = 0;
11947   switch (code)
11948     {
11949     CASE_CONVERT:
11950       c1 = VEC_PACK_TRUNC_EXPR;
11951       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11952 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
11953 	  && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11954 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11955 	optab1 = vec_pack_sbool_trunc_optab;
11956       else
11957 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
11958       break;
11959 
11960     case FIX_TRUNC_EXPR:
11961       c1 = VEC_PACK_FIX_TRUNC_EXPR;
11962       /* The signedness is determined from output operand.  */
11963       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11964       break;
11965 
11966     case FLOAT_EXPR:
11967       c1 = VEC_PACK_FLOAT_EXPR;
11968       optab1 = optab_for_tree_code (c1, vectype, optab_default);
11969       break;
11970 
11971     default:
11972       gcc_unreachable ();
11973     }
11974 
11975   if (!optab1)
11976     return false;
11977 
11978   vec_mode = TYPE_MODE (vectype);
11979   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11980     return false;
11981 
11982   *code1 = c1;
11983 
11984   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11985     {
11986       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11987 	return true;
11988       /* For scalar masks we may have different boolean
11989 	 vector types having the same QImode.  Thus we
11990 	 add additional check for elements number.  */
11991       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11992 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11993 	return true;
11994     }
11995 
11996   if (code == FLOAT_EXPR)
11997     return false;
11998 
11999   /* Check if it's a multi-step conversion that can be done using intermediate
12000      types.  */
12001   prev_mode = vec_mode;
12002   prev_type = vectype;
12003   if (code == FIX_TRUNC_EXPR)
12004     uns = TYPE_UNSIGNED (vectype_out);
12005   else
12006     uns = TYPE_UNSIGNED (vectype);
12007 
12008   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12009      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12010      costly than signed.  */
12011   if (code == FIX_TRUNC_EXPR && uns)
12012     {
12013       enum insn_code icode2;
12014 
12015       intermediate_type
12016 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12017       interm_optab
12018 	= optab_for_tree_code (c1, intermediate_type, optab_default);
12019       if (interm_optab != unknown_optab
12020 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12021 	  && insn_data[icode1].operand[0].mode
12022 	     == insn_data[icode2].operand[0].mode)
12023 	{
12024 	  uns = false;
12025 	  optab1 = interm_optab;
12026 	  icode1 = icode2;
12027 	}
12028     }
12029 
12030   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12031      intermediate steps in promotion sequence.  We try
12032      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
12033   interm_types->create (MAX_INTERM_CVT_STEPS);
12034   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12035     {
12036       intermediate_mode = insn_data[icode1].operand[0].mode;
12037       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12038 	intermediate_type
12039 	  = vect_double_mask_nunits (prev_type, intermediate_mode);
12040       else
12041 	intermediate_type
12042 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12043       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12044 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
12045 	  && intermediate_mode == prev_mode
12046 	  && SCALAR_INT_MODE_P (prev_mode))
12047 	interm_optab = vec_pack_sbool_trunc_optab;
12048       else
12049 	interm_optab
12050 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12051 				 optab_default);
12052       if (!interm_optab
12053 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12054 	  || insn_data[icode1].operand[0].mode != intermediate_mode
12055 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12056 	      == CODE_FOR_nothing))
12057 	break;
12058 
12059       interm_types->quick_push (intermediate_type);
12060       (*multi_step_cvt)++;
12061 
12062       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12063 	{
12064 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12065 	    return true;
12066 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12067 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12068 	    return true;
12069 	}
12070 
12071       prev_mode = intermediate_mode;
12072       prev_type = intermediate_type;
12073       optab1 = interm_optab;
12074     }
12075 
12076   interm_types->release ();
12077   return false;
12078 }
12079 
12080 /* Generate and return a statement that sets vector mask MASK such that
12081    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
12082 
12083 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)12084 vect_gen_while (tree mask, tree start_index, tree end_index)
12085 {
12086   tree cmp_type = TREE_TYPE (start_index);
12087   tree mask_type = TREE_TYPE (mask);
12088   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12089 						       cmp_type, mask_type,
12090 						       OPTIMIZE_FOR_SPEED));
12091   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12092 					    start_index, end_index,
12093 					    build_zero_cst (mask_type));
12094   gimple_call_set_lhs (call, mask);
12095   return call;
12096 }
12097 
12098 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12099    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
12100 
12101 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12102 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12103 		    tree end_index)
12104 {
12105   tree tmp = make_ssa_name (mask_type);
12106   gcall *call = vect_gen_while (tmp, start_index, end_index);
12107   gimple_seq_add_stmt (seq, call);
12108   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12109 }
12110 
12111 /* Try to compute the vector types required to vectorize STMT_INFO,
12112    returning true on success and false if vectorization isn't possible.
12113    If GROUP_SIZE is nonzero and we're performing BB vectorization,
12114    take sure that the number of elements in the vectors is no bigger
12115    than GROUP_SIZE.
12116 
12117    On success:
12118 
12119    - Set *STMT_VECTYPE_OUT to:
12120      - NULL_TREE if the statement doesn't need to be vectorized;
12121      - the equivalent of STMT_VINFO_VECTYPE otherwise.
12122 
12123    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12124      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12125      statement does not help to determine the overall number of units.  */
12126 
12127 opt_result
vect_get_vector_types_for_stmt(stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12128 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
12129 				tree *stmt_vectype_out,
12130 				tree *nunits_vectype_out,
12131 				unsigned int group_size)
12132 {
12133   vec_info *vinfo = stmt_info->vinfo;
12134   gimple *stmt = stmt_info->stmt;
12135 
12136   /* For BB vectorization, we should always have a group size once we've
12137      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12138      are tentative requests during things like early data reference
12139      analysis and pattern recognition.  */
12140   if (is_a <bb_vec_info> (vinfo))
12141     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12142   else
12143     group_size = 0;
12144 
12145   *stmt_vectype_out = NULL_TREE;
12146   *nunits_vectype_out = NULL_TREE;
12147 
12148   if (gimple_get_lhs (stmt) == NULL_TREE
12149       /* MASK_STORE has no lhs, but is ok.  */
12150       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12151     {
12152       if (is_a <gcall *> (stmt))
12153 	{
12154 	  /* Ignore calls with no lhs.  These must be calls to
12155 	     #pragma omp simd functions, and what vectorization factor
12156 	     it really needs can't be determined until
12157 	     vectorizable_simd_clone_call.  */
12158 	  if (dump_enabled_p ())
12159 	    dump_printf_loc (MSG_NOTE, vect_location,
12160 			     "defer to SIMD clone analysis.\n");
12161 	  return opt_result::success ();
12162 	}
12163 
12164       return opt_result::failure_at (stmt,
12165 				     "not vectorized: irregular stmt.%G", stmt);
12166     }
12167 
12168   if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12169     return opt_result::failure_at (stmt,
12170 				   "not vectorized: vector stmt in loop:%G",
12171 				   stmt);
12172 
12173   tree vectype;
12174   tree scalar_type = NULL_TREE;
12175   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12176     {
12177       vectype = STMT_VINFO_VECTYPE (stmt_info);
12178       if (dump_enabled_p ())
12179 	dump_printf_loc (MSG_NOTE, vect_location,
12180 			 "precomputed vectype: %T\n", vectype);
12181     }
12182   else if (vect_use_mask_type_p (stmt_info))
12183     {
12184       unsigned int precision = stmt_info->mask_precision;
12185       scalar_type = build_nonstandard_integer_type (precision, 1);
12186       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12187       if (!vectype)
12188 	return opt_result::failure_at (stmt, "not vectorized: unsupported"
12189 				       " data-type %T\n", scalar_type);
12190       if (dump_enabled_p ())
12191 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12192     }
12193   else
12194     {
12195       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12196 	scalar_type = TREE_TYPE (DR_REF (dr));
12197       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12198 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12199       else
12200 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12201 
12202       if (dump_enabled_p ())
12203 	{
12204 	  if (group_size)
12205 	    dump_printf_loc (MSG_NOTE, vect_location,
12206 			     "get vectype for scalar type (group size %d):"
12207 			     " %T\n", group_size, scalar_type);
12208 	  else
12209 	    dump_printf_loc (MSG_NOTE, vect_location,
12210 			     "get vectype for scalar type: %T\n", scalar_type);
12211 	}
12212       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12213       if (!vectype)
12214 	return opt_result::failure_at (stmt,
12215 				       "not vectorized:"
12216 				       " unsupported data-type %T\n",
12217 				       scalar_type);
12218 
12219       if (dump_enabled_p ())
12220 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12221     }
12222   *stmt_vectype_out = vectype;
12223 
12224   /* Don't try to compute scalar types if the stmt produces a boolean
12225      vector; use the existing vector type instead.  */
12226   tree nunits_vectype = vectype;
12227   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12228     {
12229       /* The number of units is set according to the smallest scalar
12230 	 type (or the largest vector size, but we only support one
12231 	 vector size per vectorization).  */
12232       HOST_WIDE_INT dummy;
12233       scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12234       if (scalar_type != TREE_TYPE (vectype))
12235 	{
12236 	  if (dump_enabled_p ())
12237 	    dump_printf_loc (MSG_NOTE, vect_location,
12238 			     "get vectype for smallest scalar type: %T\n",
12239 			     scalar_type);
12240 	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12241 							group_size);
12242 	  if (!nunits_vectype)
12243 	    return opt_result::failure_at
12244 	      (stmt, "not vectorized: unsupported data-type %T\n",
12245 	       scalar_type);
12246 	  if (dump_enabled_p ())
12247 	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12248 			     nunits_vectype);
12249 	}
12250     }
12251 
12252   if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12253 		   TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12254     return opt_result::failure_at (stmt,
12255 				   "Not vectorized: Incompatible number "
12256 				   "of vector subparts between %T and %T\n",
12257 				   nunits_vectype, *stmt_vectype_out);
12258 
12259   if (dump_enabled_p ())
12260     {
12261       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12262       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12263       dump_printf (MSG_NOTE, "\n");
12264     }
12265 
12266   *nunits_vectype_out = nunits_vectype;
12267   return opt_result::success ();
12268 }
12269