1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58 
59 /* For lang_hooks.types.type_for_mode.  */
60 #include "langhooks.h"
61 
62 /* Return the vectorized type for the given statement.  */
63 
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67   return STMT_VINFO_VECTYPE (stmt_info);
68 }
69 
70 /* Return TRUE iff the given statement is in an inner loop relative to
71    the loop being vectorized.  */
72 bool
stmt_in_inner_loop_p(class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
74 {
75   gimple *stmt = STMT_VINFO_STMT (stmt_info);
76   basic_block bb = gimple_bb (stmt);
77   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78   class loop* loop;
79 
80   if (!loop_vinfo)
81     return false;
82 
83   loop = LOOP_VINFO_LOOP (loop_vinfo);
84 
85   return (bb->loop_father == loop->inner);
86 }
87 
88 /* Record the cost of a statement, either by directly informing the
89    target model or by saving it in a vector for later processing.
90    Return a preliminary estimate of the statement's cost.  */
91 
92 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 		  int misalign, enum vect_cost_model_location where)
96 {
97   if ((kind == vector_load || kind == unaligned_load)
98       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99     kind = vector_gather_load;
100   if ((kind == vector_store || kind == unaligned_store)
101       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102     kind = vector_scatter_store;
103 
104   stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105   body_cost_vec->safe_push (si);
106 
107   tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108   return (unsigned)
109       (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111 
112 /* Return a variable of type ELEM_TYPE[NELEMS].  */
113 
114 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 			 "vect_array");
119 }
120 
121 /* ARRAY is an array of vectors created by create_vector_array.
122    Return an SSA_NAME for the vector in index N.  The reference
123    is part of the vectorization of STMT_INFO and the vector is associated
124    with scalar destination SCALAR_DEST.  */
125 
126 static tree
read_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 {
130   tree vect_type, vect, vect_name, array_ref;
131   gimple *new_stmt;
132 
133   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134   vect_type = TREE_TYPE (TREE_TYPE (array));
135   vect = vect_create_destination_var (scalar_dest, vect_type);
136   array_ref = build4 (ARRAY_REF, vect_type, array,
137 		      build_int_cst (size_type_node, n),
138 		      NULL_TREE, NULL_TREE);
139 
140   new_stmt = gimple_build_assign (vect, array_ref);
141   vect_name = make_ssa_name (vect, new_stmt);
142   gimple_assign_set_lhs (new_stmt, vect_name);
143   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144 
145   return vect_name;
146 }
147 
148 /* ARRAY is an array of vectors created by create_vector_array.
149    Emit code to store SSA_NAME VECT in index N of the array.
150    The store is part of the vectorization of STMT_INFO.  */
151 
152 static void
write_vector_array(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
155 {
156   tree array_ref;
157   gimple *new_stmt;
158 
159   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 		      build_int_cst (size_type_node, n),
161 		      NULL_TREE, NULL_TREE);
162 
163   new_stmt = gimple_build_assign (array_ref, vect);
164   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
165 }
166 
167 /* PTR is a pointer to an array of type TYPE.  Return a representation
168    of *PTR.  The memory reference replaces those in FIRST_DR
169    (and its group).  */
170 
171 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 {
174   tree mem_ref;
175 
176   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177   /* Arrays have the same alignment as their type.  */
178   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179   return mem_ref;
180 }
181 
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183    Emit the clobber before *GSI.  */
184 
185 static void
vect_clobber_variable(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 		       tree var)
188 {
189   tree clobber = build_clobber (TREE_TYPE (var));
190   gimple *new_stmt = gimple_build_assign (var, clobber);
191   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
192 }
193 
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
195 
196 /* Function vect_mark_relevant.
197 
198    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
199 
200 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 		    enum vect_relevant relevant, bool live_p)
203 {
204   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206 
207   if (dump_enabled_p ())
208     dump_printf_loc (MSG_NOTE, vect_location,
209 		     "mark relevant %d, live %d: %G", relevant, live_p,
210 		     stmt_info->stmt);
211 
212   /* If this stmt is an original stmt in a pattern, we might need to mark its
213      related pattern stmt instead of the original stmt.  However, such stmts
214      may have their own uses that are not in any pattern, in such cases the
215      stmt itself should be marked.  */
216   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217     {
218       /* This is the last stmt in a sequence that was detected as a
219 	 pattern that can potentially be vectorized.  Don't mark the stmt
220 	 as relevant/live because it's not going to be vectorized.
221 	 Instead mark the pattern-stmt that replaces it.  */
222 
223       if (dump_enabled_p ())
224 	dump_printf_loc (MSG_NOTE, vect_location,
225 			 "last stmt in pattern. don't mark"
226 			 " relevant/live.\n");
227       stmt_vec_info old_stmt_info = stmt_info;
228       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232     }
233 
234   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236     STMT_VINFO_RELEVANT (stmt_info) = relevant;
237 
238   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240     {
241       if (dump_enabled_p ())
242         dump_printf_loc (MSG_NOTE, vect_location,
243                          "already marked relevant/live.\n");
244       return;
245     }
246 
247   worklist->safe_push (stmt_info);
248 }
249 
250 
251 /* Function is_simple_and_all_uses_invariant
252 
253    Return true if STMT_INFO is simple and all uses of it are invariant.  */
254 
255 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 				  loop_vec_info loop_vinfo)
258 {
259   tree op;
260   ssa_op_iter iter;
261 
262   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263   if (!stmt)
264     return false;
265 
266   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267     {
268       enum vect_def_type dt = vect_uninitialized_def;
269 
270       if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 	{
272 	  if (dump_enabled_p ())
273 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 			     "use not simple.\n");
275 	  return false;
276 	}
277 
278       if (dt != vect_external_def && dt != vect_constant_def)
279 	return false;
280     }
281   return true;
282 }
283 
284 /* Function vect_stmt_relevant_p.
285 
286    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287    is "relevant for vectorization".
288 
289    A stmt is considered "relevant for vectorization" if:
290    - it has uses outside the loop.
291    - it has vdefs (it alters memory).
292    - control stmts in the loop (except for the exit condition).
293 
294    CHECKME: what other side effects would the vectorizer allow?  */
295 
296 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 		      enum vect_relevant *relevant, bool *live_p)
299 {
300   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301   ssa_op_iter op_iter;
302   imm_use_iterator imm_iter;
303   use_operand_p use_p;
304   def_operand_p def_p;
305 
306   *relevant = vect_unused_in_scope;
307   *live_p = false;
308 
309   /* cond stmt other than loop exit cond.  */
310   if (is_ctrl_stmt (stmt_info->stmt)
311       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312     *relevant = vect_used_in_scope;
313 
314   /* changing memory.  */
315   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316     if (gimple_vdef (stmt_info->stmt)
317 	&& !gimple_clobber_p (stmt_info->stmt))
318       {
319 	if (dump_enabled_p ())
320 	  dump_printf_loc (MSG_NOTE, vect_location,
321                            "vec_stmt_relevant_p: stmt has vdefs.\n");
322 	*relevant = vect_used_in_scope;
323       }
324 
325   /* uses outside the loop.  */
326   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327     {
328       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 	{
330 	  basic_block bb = gimple_bb (USE_STMT (use_p));
331 	  if (!flow_bb_inside_loop_p (loop, bb))
332 	    {
333 	      if (is_gimple_debug (USE_STMT (use_p)))
334 		continue;
335 
336 	      if (dump_enabled_p ())
337 		dump_printf_loc (MSG_NOTE, vect_location,
338                                  "vec_stmt_relevant_p: used out of loop.\n");
339 
340 	      /* We expect all such uses to be in the loop exit phis
341 		 (because of loop closed form)   */
342 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 	      gcc_assert (bb == single_exit (loop)->dest);
344 
345               *live_p = true;
346 	    }
347 	}
348     }
349 
350   if (*live_p && *relevant == vect_unused_in_scope
351       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352     {
353       if (dump_enabled_p ())
354 	dump_printf_loc (MSG_NOTE, vect_location,
355 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356       *relevant = vect_used_only_live;
357     }
358 
359   return (*live_p || *relevant);
360 }
361 
362 
363 /* Function exist_non_indexing_operands_for_use_p
364 
365    USE is one of the uses attached to STMT_INFO.  Check if USE is
366    used in STMT_INFO for anything other than indexing an array.  */
367 
368 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 {
371   tree operand;
372 
373   /* USE corresponds to some operand in STMT.  If there is no data
374      reference in STMT, then any operand that corresponds to USE
375      is not indexing an array.  */
376   if (!STMT_VINFO_DATA_REF (stmt_info))
377     return true;
378 
379   /* STMT has a data_ref. FORNOW this means that its of one of
380      the following forms:
381      -1- ARRAY_REF = var
382      -2- var = ARRAY_REF
383      (This should have been verified in analyze_data_refs).
384 
385      'var' in the second case corresponds to a def, not a use,
386      so USE cannot correspond to any operands that are not used
387      for array indexing.
388 
389      Therefore, all we need to check is if STMT falls into the
390      first case, and whether var corresponds to USE.  */
391 
392   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393   if (!assign || !gimple_assign_copy_p (assign))
394     {
395       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396       if (call && gimple_call_internal_p (call))
397 	{
398 	  internal_fn ifn = gimple_call_internal_fn (call);
399 	  int mask_index = internal_fn_mask_index (ifn);
400 	  if (mask_index >= 0
401 	      && use == gimple_call_arg (call, mask_index))
402 	    return true;
403 	  int stored_value_index = internal_fn_stored_value_index (ifn);
404 	  if (stored_value_index >= 0
405 	      && use == gimple_call_arg (call, stored_value_index))
406 	    return true;
407 	  if (internal_gather_scatter_fn_p (ifn)
408 	      && use == gimple_call_arg (call, 1))
409 	    return true;
410 	}
411       return false;
412     }
413 
414   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415     return false;
416   operand = gimple_assign_rhs1 (assign);
417   if (TREE_CODE (operand) != SSA_NAME)
418     return false;
419 
420   if (operand == use)
421     return true;
422 
423   return false;
424 }
425 
426 
427 /*
428    Function process_use.
429 
430    Inputs:
431    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433      that defined USE.  This is done by calling mark_relevant and passing it
434      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436      be performed.
437 
438    Outputs:
439    Generally, LIVE_P and RELEVANT are used to define the liveness and
440    relevance info of the DEF_STMT of this USE:
441        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443    Exceptions:
444    - case 1: If USE is used only for address computations (e.g. array indexing),
445    which does not need to be directly vectorized, then the liveness/relevance
446    of the respective DEF_STMT is left unchanged.
447    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448    we skip DEF_STMT cause it had already been processed.
449    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450    "relevant" will be modified accordingly.
451 
452    Return true if everything is as expected. Return false otherwise.  */
453 
454 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 	     bool force)
458 {
459   stmt_vec_info dstmt_vinfo;
460   enum vect_def_type dt;
461 
462   /* case 1: we are only interested in uses that need to be vectorized.  Uses
463      that are used for address computation are not considered relevant.  */
464   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465     return opt_result::success ();
466 
467   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468     return opt_result::failure_at (stmt_vinfo->stmt,
469 				   "not vectorized:"
470 				   " unsupported use in stmt.\n");
471 
472   if (!dstmt_vinfo)
473     return opt_result::success ();
474 
475   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
476   basic_block bb = gimple_bb (stmt_vinfo->stmt);
477 
478   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479      We have to force the stmt live since the epilogue loop needs it to
480      continue computing the reduction.  */
481   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485       && bb->loop_father == def_bb->loop_father)
486     {
487       if (dump_enabled_p ())
488 	dump_printf_loc (MSG_NOTE, vect_location,
489 			 "reduc-stmt defining reduc-phi in the same nest.\n");
490       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
491       return opt_result::success ();
492     }
493 
494   /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 	outer-loop-header-bb:
496 		d = dstmt_vinfo
497 	inner-loop:
498 		stmt # use (d)
499 	outer-loop-tail-bb:
500 		...		  */
501   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
502     {
503       if (dump_enabled_p ())
504 	dump_printf_loc (MSG_NOTE, vect_location,
505                          "outer-loop def-stmt defining inner-loop stmt.\n");
506 
507       switch (relevant)
508 	{
509 	case vect_unused_in_scope:
510 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
511 		      vect_used_in_scope : vect_unused_in_scope;
512 	  break;
513 
514 	case vect_used_in_outer_by_reduction:
515           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 	  relevant = vect_used_by_reduction;
517 	  break;
518 
519 	case vect_used_in_outer:
520           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 	  relevant = vect_used_in_scope;
522 	  break;
523 
524 	case vect_used_in_scope:
525 	  break;
526 
527 	default:
528 	  gcc_unreachable ();
529 	}
530     }
531 
532   /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 	outer-loop-header-bb:
534 		...
535 	inner-loop:
536 		d = dstmt_vinfo
537 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
538 		stmt # use (d)		*/
539   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
540     {
541       if (dump_enabled_p ())
542 	dump_printf_loc (MSG_NOTE, vect_location,
543                          "inner-loop def-stmt defining outer-loop stmt.\n");
544 
545       switch (relevant)
546         {
547         case vect_unused_in_scope:
548           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
549             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
550                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
551           break;
552 
553         case vect_used_by_reduction:
554 	case vect_used_only_live:
555           relevant = vect_used_in_outer_by_reduction;
556           break;
557 
558         case vect_used_in_scope:
559           relevant = vect_used_in_outer;
560           break;
561 
562         default:
563           gcc_unreachable ();
564         }
565     }
566   /* We are also not interested in uses on loop PHI backedges that are
567      inductions.  Otherwise we'll needlessly vectorize the IV increment
568      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
569      of course.  */
570   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
571 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
572 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
573 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
574 				      loop_latch_edge (bb->loop_father))
575 	       == use))
576     {
577       if (dump_enabled_p ())
578 	dump_printf_loc (MSG_NOTE, vect_location,
579                          "induction value on backedge.\n");
580       return opt_result::success ();
581     }
582 
583 
584   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
585   return opt_result::success ();
586 }
587 
588 
589 /* Function vect_mark_stmts_to_be_vectorized.
590 
591    Not all stmts in the loop need to be vectorized. For example:
592 
593      for i...
594        for j...
595    1.    T0 = i + j
596    2.	 T1 = a[T0]
597 
598    3.    j = j + 1
599 
600    Stmt 1 and 3 do not need to be vectorized, because loop control and
601    addressing of vectorized data-refs are handled differently.
602 
603    This pass detects such stmts.  */
604 
605 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
607 {
608   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610   unsigned int nbbs = loop->num_nodes;
611   gimple_stmt_iterator si;
612   unsigned int i;
613   basic_block bb;
614   bool live_p;
615   enum vect_relevant relevant;
616 
617   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
618 
619   auto_vec<stmt_vec_info, 64> worklist;
620 
621   /* 1. Init worklist.  */
622   for (i = 0; i < nbbs; i++)
623     {
624       bb = bbs[i];
625       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
626 	{
627 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
628 	  if (dump_enabled_p ())
629 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
630 			     phi_info->stmt);
631 
632 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
633 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
634 	}
635       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 	{
637 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
638 	  if (dump_enabled_p ())
639 	      dump_printf_loc (MSG_NOTE, vect_location,
640 			       "init: stmt relevant? %G", stmt_info->stmt);
641 
642 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
643 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
644 	}
645     }
646 
647   /* 2. Process_worklist */
648   while (worklist.length () > 0)
649     {
650       use_operand_p use_p;
651       ssa_op_iter iter;
652 
653       stmt_vec_info stmt_vinfo = worklist.pop ();
654       if (dump_enabled_p ())
655 	dump_printf_loc (MSG_NOTE, vect_location,
656 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
657 
658       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
660 	 of STMT.  */
661       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
662 
663       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 	 propagated as is to the DEF_STMTs of its USEs.
665 
666 	 One exception is when STMT has been identified as defining a reduction
667 	 variable; in this case we set the relevance to vect_used_by_reduction.
668 	 This is because we distinguish between two kinds of relevant stmts -
669 	 those that are used by a reduction computation, and those that are
670 	 (also) used by a regular computation.  This allows us later on to
671 	 identify stmts that are used solely by a reduction, and therefore the
672 	 order of the results that they produce does not have to be kept.  */
673 
674       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
675         {
676           case vect_reduction_def:
677 	    gcc_assert (relevant != vect_unused_in_scope);
678 	    if (relevant != vect_unused_in_scope
679 		&& relevant != vect_used_in_scope
680 		&& relevant != vect_used_by_reduction
681 		&& relevant != vect_used_only_live)
682 	      return opt_result::failure_at
683 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
684 	    break;
685 
686           case vect_nested_cycle:
687 	    if (relevant != vect_unused_in_scope
688 		&& relevant != vect_used_in_outer_by_reduction
689 		&& relevant != vect_used_in_outer)
690 	      return opt_result::failure_at
691 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
692             break;
693 
694           case vect_double_reduction_def:
695 	    if (relevant != vect_unused_in_scope
696 		&& relevant != vect_used_by_reduction
697 		&& relevant != vect_used_only_live)
698 	      return opt_result::failure_at
699 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
700             break;
701 
702           default:
703             break;
704         }
705 
706       if (is_pattern_stmt_p (stmt_vinfo))
707         {
708           /* Pattern statements are not inserted into the code, so
709              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710              have to scan the RHS or function arguments instead.  */
711 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
712 	    {
713 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
714 	      tree op = gimple_assign_rhs1 (assign);
715 
716 	      i = 1;
717 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
718 		{
719 		  opt_result res
720 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
721 				   loop_vinfo, relevant, &worklist, false);
722 		  if (!res)
723 		    return res;
724 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
725 				     loop_vinfo, relevant, &worklist, false);
726 		  if (!res)
727 		    return res;
728 		  i = 2;
729 		}
730 	      for (; i < gimple_num_ops (assign); i++)
731 		{
732 		  op = gimple_op (assign, i);
733                   if (TREE_CODE (op) == SSA_NAME)
734 		    {
735 		      opt_result res
736 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
737 				       &worklist, false);
738 		      if (!res)
739 			return res;
740 		    }
741                  }
742             }
743 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
744 	    {
745 	      for (i = 0; i < gimple_call_num_args (call); i++)
746 		{
747 		  tree arg = gimple_call_arg (call, i);
748 		  opt_result res
749 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
750 				   &worklist, false);
751 		  if (!res)
752 		    return res;
753 		}
754 	    }
755         }
756       else
757 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
758           {
759             tree op = USE_FROM_PTR (use_p);
760 	    opt_result res
761 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
762 			     &worklist, false);
763 	    if (!res)
764 	      return res;
765           }
766 
767       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
768 	{
769 	  gather_scatter_info gs_info;
770 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
771 	    gcc_unreachable ();
772 	  opt_result res
773 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
774 			   &worklist, true);
775 	  if (!res)
776 	    {
777 	      if (fatal)
778 		*fatal = false;
779 	      return res;
780 	    }
781 	}
782     } /* while worklist */
783 
784   return opt_result::success ();
785 }
786 
787 /* Compute the prologue cost for invariant or constant operands.  */
788 
789 static unsigned
vect_prologue_cost_for_slp_op(slp_tree node,stmt_vec_info stmt_info,unsigned opno,enum vect_def_type dt,stmt_vector_for_cost * cost_vec)790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 			       unsigned opno, enum vect_def_type dt,
792 			       stmt_vector_for_cost *cost_vec)
793 {
794   vec_info *vinfo = stmt_info->vinfo;
795   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796   tree op = gimple_op (stmt, opno);
797   unsigned prologue_cost = 0;
798 
799   /* Without looking at the actual initializer a vector of
800      constants can be implemented as load from the constant pool.
801      When all elements are the same we can use a splat.  */
802   tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
803   unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804   unsigned num_vects_to_check;
805   unsigned HOST_WIDE_INT const_nunits;
806   unsigned nelt_limit;
807   if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808       && ! multiple_p (const_nunits, group_size))
809     {
810       num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811       nelt_limit = const_nunits;
812     }
813   else
814     {
815       /* If either the vector has variable length or the vectors
816 	 are composed of repeated whole groups we only need to
817 	 cost construction once.  All vectors will be the same.  */
818       num_vects_to_check = 1;
819       nelt_limit = group_size;
820     }
821   tree elt = NULL_TREE;
822   unsigned nelt = 0;
823   for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824     {
825       unsigned si = j % group_size;
826       if (nelt == 0)
827 	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828       /* ???  We're just tracking whether all operands of a single
829 	 vector initializer are the same, ideally we'd check if
830 	 we emitted the same one already.  */
831       else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 				 opno))
833 	elt = NULL_TREE;
834       nelt++;
835       if (nelt == nelt_limit)
836 	{
837 	  /* ???  We need to pass down stmt_info for a vector type
838 	     even if it points to the wrong stmt.  */
839 	  prologue_cost += record_stmt_cost
840 	      (cost_vec, 1,
841 	       dt == vect_external_def
842 	       ? (elt ? scalar_to_vec : vec_construct)
843 	       : vector_load,
844 	       stmt_info, 0, vect_prologue);
845 	  nelt = 0;
846 	}
847     }
848 
849   return prologue_cost;
850 }
851 
852 /* Function vect_model_simple_cost.
853 
854    Models cost for simple operations, i.e. those that only emit ncopies of a
855    single op.  Right now, this does not account for multiple insns that could
856    be generated for the single vector op.  We will handle that shortly.  */
857 
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 			enum vect_def_type *dt,
861 			int ndts,
862 			slp_tree node,
863 			stmt_vector_for_cost *cost_vec,
864 			vect_cost_for_stmt kind = vector_stmt)
865 {
866   int inside_cost = 0, prologue_cost = 0;
867 
868   gcc_assert (cost_vec != NULL);
869 
870   /* ???  Somehow we need to fix this at the callers.  */
871   if (node)
872     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
873 
874   if (node)
875     {
876       /* Scan operands and account for prologue cost of constants/externals.
877 	 ???  This over-estimates cost for multiple uses and should be
878 	 re-engineered.  */
879       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
880       tree lhs = gimple_get_lhs (stmt);
881       for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
882 	{
883 	  tree op = gimple_op (stmt, i);
884 	  enum vect_def_type dt;
885 	  if (!op || op == lhs)
886 	    continue;
887 	  if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
888 	      && (dt == vect_constant_def || dt == vect_external_def))
889 	    prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
890 							    i, dt, cost_vec);
891 	}
892     }
893   else
894     /* Cost the "broadcast" of a scalar operand in to a vector operand.
895        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
896        cost model.  */
897     for (int i = 0; i < ndts; i++)
898       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
899 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
900 					   stmt_info, 0, vect_prologue);
901 
902   /* Adjust for two-operator SLP nodes.  */
903   if (node && SLP_TREE_TWO_OPERATORS (node))
904     {
905       ncopies *= 2;
906       inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
907 				       stmt_info, 0, vect_body);
908     }
909 
910   /* Pass the inside-of-loop statements to the target-specific cost model.  */
911   inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
912 				   stmt_info, 0, vect_body);
913 
914   if (dump_enabled_p ())
915     dump_printf_loc (MSG_NOTE, vect_location,
916                      "vect_model_simple_cost: inside_cost = %d, "
917                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
918 }
919 
920 
921 /* Model cost for type demotion and promotion operations.  PWR is
922    normally zero for single-step promotions and demotions.  It will be
923    one if two-step promotion/demotion is required, and so on.  NCOPIES
924    is the number of vector results (and thus number of instructions)
925    for the narrowest end of the operation chain.  Each additional
926    step doubles the number of instructions required.  */
927 
928 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec)929 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
930 				    enum vect_def_type *dt,
931 				    unsigned int ncopies, int pwr,
932 				    stmt_vector_for_cost *cost_vec)
933 {
934   int i;
935   int inside_cost = 0, prologue_cost = 0;
936 
937   for (i = 0; i < pwr + 1; i++)
938     {
939       inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
940 				       stmt_info, 0, vect_body);
941       ncopies *= 2;
942     }
943 
944   /* FORNOW: Assuming maximum 2 args per stmts.  */
945   for (i = 0; i < 2; i++)
946     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
947       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
948 					 stmt_info, 0, vect_prologue);
949 
950   if (dump_enabled_p ())
951     dump_printf_loc (MSG_NOTE, vect_location,
952                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
953                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
954 }
955 
956 /* Returns true if the current function returns DECL.  */
957 
958 static bool
cfun_returns(tree decl)959 cfun_returns (tree decl)
960 {
961   edge_iterator ei;
962   edge e;
963   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
964     {
965       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
966       if (!ret)
967 	continue;
968       if (gimple_return_retval (ret) == decl)
969 	return true;
970       /* We often end up with an aggregate copy to the result decl,
971          handle that case as well.  First skip intermediate clobbers
972 	 though.  */
973       gimple *def = ret;
974       do
975 	{
976 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
977 	}
978       while (gimple_clobber_p (def));
979       if (is_a <gassign *> (def)
980 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
981 	  && gimple_assign_rhs1 (def) == decl)
982 	return true;
983     }
984   return false;
985 }
986 
987 /* Function vect_model_store_cost
988 
989    Models cost for stores.  In the case of grouped accesses, one access
990    has the overhead of the grouped access attributed to it.  */
991 
992 static void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type dt,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)993 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
994 		       enum vect_def_type dt,
995 		       vect_memory_access_type memory_access_type,
996 		       vec_load_store_type vls_type, slp_tree slp_node,
997 		       stmt_vector_for_cost *cost_vec)
998 {
999   unsigned int inside_cost = 0, prologue_cost = 0;
1000   stmt_vec_info first_stmt_info = stmt_info;
1001   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1002 
1003   /* ???  Somehow we need to fix this at the callers.  */
1004   if (slp_node)
1005     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1006 
1007   if (vls_type == VLS_STORE_INVARIANT)
1008     {
1009       if (slp_node)
1010 	prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1011 							1, dt, cost_vec);
1012       else
1013 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1014 					   stmt_info, 0, vect_prologue);
1015     }
1016 
1017   /* Grouped stores update all elements in the group at once,
1018      so we want the DR for the first statement.  */
1019   if (!slp_node && grouped_access_p)
1020     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1021 
1022   /* True if we should include any once-per-group costs as well as
1023      the cost of the statement itself.  For SLP we only get called
1024      once per group anyhow.  */
1025   bool first_stmt_p = (first_stmt_info == stmt_info);
1026 
1027   /* We assume that the cost of a single store-lanes instruction is
1028      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
1029      access is instead being provided by a permute-and-store operation,
1030      include the cost of the permutes.  */
1031   if (first_stmt_p
1032       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1033     {
1034       /* Uses a high and low interleave or shuffle operations for each
1035 	 needed permute.  */
1036       int group_size = DR_GROUP_SIZE (first_stmt_info);
1037       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1038       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1039 				      stmt_info, 0, vect_body);
1040 
1041       if (dump_enabled_p ())
1042         dump_printf_loc (MSG_NOTE, vect_location,
1043                          "vect_model_store_cost: strided group_size = %d .\n",
1044                          group_size);
1045     }
1046 
1047   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1048   /* Costs of the stores.  */
1049   if (memory_access_type == VMAT_ELEMENTWISE
1050       || memory_access_type == VMAT_GATHER_SCATTER)
1051     {
1052       /* N scalar stores plus extracting the elements.  */
1053       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1054       inside_cost += record_stmt_cost (cost_vec,
1055 				       ncopies * assumed_nunits,
1056 				       scalar_store, stmt_info, 0, vect_body);
1057     }
1058   else
1059     vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1060 
1061   if (memory_access_type == VMAT_ELEMENTWISE
1062       || memory_access_type == VMAT_STRIDED_SLP)
1063     {
1064       /* N scalar stores plus extracting the elements.  */
1065       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1066       inside_cost += record_stmt_cost (cost_vec,
1067 				       ncopies * assumed_nunits,
1068 				       vec_to_scalar, stmt_info, 0, vect_body);
1069     }
1070 
1071   /* When vectorizing a store into the function result assign
1072      a penalty if the function returns in a multi-register location.
1073      In this case we assume we'll end up with having to spill the
1074      vector result and do piecewise loads as a conservative estimate.  */
1075   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1076   if (base
1077       && (TREE_CODE (base) == RESULT_DECL
1078 	  || (DECL_P (base) && cfun_returns (base)))
1079       && !aggregate_value_p (base, cfun->decl))
1080     {
1081       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1082       /* ???  Handle PARALLEL in some way.  */
1083       if (REG_P (reg))
1084 	{
1085 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1086 	  /* Assume that a single reg-reg move is possible and cheap,
1087 	     do not account for vector to gp register move cost.  */
1088 	  if (nregs > 1)
1089 	    {
1090 	      /* Spill.  */
1091 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
1092 						 vector_store,
1093 						 stmt_info, 0, vect_epilogue);
1094 	      /* Loads.  */
1095 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1096 						 scalar_load,
1097 						 stmt_info, 0, vect_epilogue);
1098 	    }
1099 	}
1100     }
1101 
1102   if (dump_enabled_p ())
1103     dump_printf_loc (MSG_NOTE, vect_location,
1104                      "vect_model_store_cost: inside_cost = %d, "
1105                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1106 }
1107 
1108 
1109 /* Calculate cost of DR's memory access.  */
1110 void
vect_get_store_cost(stmt_vec_info stmt_info,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1111 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1112 		     unsigned int *inside_cost,
1113 		     stmt_vector_for_cost *body_cost_vec)
1114 {
1115   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1116   int alignment_support_scheme
1117     = vect_supportable_dr_alignment (dr_info, false);
1118 
1119   switch (alignment_support_scheme)
1120     {
1121     case dr_aligned:
1122       {
1123 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1124 					  vector_store, stmt_info, 0,
1125 					  vect_body);
1126 
1127         if (dump_enabled_p ())
1128           dump_printf_loc (MSG_NOTE, vect_location,
1129                            "vect_model_store_cost: aligned.\n");
1130         break;
1131       }
1132 
1133     case dr_unaligned_supported:
1134       {
1135         /* Here, we assign an additional cost for the unaligned store.  */
1136 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1137 					  unaligned_store, stmt_info,
1138 					  DR_MISALIGNMENT (dr_info),
1139 					  vect_body);
1140         if (dump_enabled_p ())
1141           dump_printf_loc (MSG_NOTE, vect_location,
1142                            "vect_model_store_cost: unaligned supported by "
1143                            "hardware.\n");
1144         break;
1145       }
1146 
1147     case dr_unaligned_unsupported:
1148       {
1149         *inside_cost = VECT_MAX_COST;
1150 
1151         if (dump_enabled_p ())
1152           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1153                            "vect_model_store_cost: unsupported access.\n");
1154         break;
1155       }
1156 
1157     default:
1158       gcc_unreachable ();
1159     }
1160 }
1161 
1162 
1163 /* Function vect_model_load_cost
1164 
1165    Models cost for loads.  In the case of grouped accesses, one access has
1166    the overhead of the grouped access attributed to it.  Since unaligned
1167    accesses are supported for loads, we also account for the costs of the
1168    access scheme chosen.  */
1169 
1170 static void
vect_model_load_cost(stmt_vec_info stmt_info,unsigned ncopies,vect_memory_access_type memory_access_type,slp_instance instance,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1171 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1172 		      vect_memory_access_type memory_access_type,
1173 		      slp_instance instance,
1174 		      slp_tree slp_node,
1175 		      stmt_vector_for_cost *cost_vec)
1176 {
1177   unsigned int inside_cost = 0, prologue_cost = 0;
1178   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1179 
1180   gcc_assert (cost_vec);
1181 
1182   /* ???  Somehow we need to fix this at the callers.  */
1183   if (slp_node)
1184     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1185 
1186   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1187     {
1188       /* If the load is permuted then the alignment is determined by
1189 	 the first group element not by the first scalar stmt DR.  */
1190       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1191       /* Record the cost for the permutation.  */
1192       unsigned n_perms;
1193       unsigned assumed_nunits
1194 	= vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1195       unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1196       vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1197 				    slp_vf, instance, true,
1198 				    &n_perms);
1199       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1200 				       first_stmt_info, 0, vect_body);
1201       /* And adjust the number of loads performed.  This handles
1202 	 redundancies as well as loads that are later dead.  */
1203       auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1204       bitmap_clear (perm);
1205       for (unsigned i = 0;
1206 	   i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1207 	bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1208       ncopies = 0;
1209       bool load_seen = false;
1210       for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1211 	{
1212 	  if (i % assumed_nunits == 0)
1213 	    {
1214 	      if (load_seen)
1215 		ncopies++;
1216 	      load_seen = false;
1217 	    }
1218 	  if (bitmap_bit_p (perm, i))
1219 	    load_seen = true;
1220 	}
1221       if (load_seen)
1222 	ncopies++;
1223       gcc_assert (ncopies
1224 		  <= (DR_GROUP_SIZE (first_stmt_info)
1225 		      - DR_GROUP_GAP (first_stmt_info)
1226 		      + assumed_nunits - 1) / assumed_nunits);
1227     }
1228 
1229   /* Grouped loads read all elements in the group at once,
1230      so we want the DR for the first statement.  */
1231   stmt_vec_info first_stmt_info = stmt_info;
1232   if (!slp_node && grouped_access_p)
1233     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1234 
1235   /* True if we should include any once-per-group costs as well as
1236      the cost of the statement itself.  For SLP we only get called
1237      once per group anyhow.  */
1238   bool first_stmt_p = (first_stmt_info == stmt_info);
1239 
1240   /* We assume that the cost of a single load-lanes instruction is
1241      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1242      access is instead being provided by a load-and-permute operation,
1243      include the cost of the permutes.  */
1244   if (first_stmt_p
1245       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1246     {
1247       /* Uses an even and odd extract operations or shuffle operations
1248 	 for each needed permute.  */
1249       int group_size = DR_GROUP_SIZE (first_stmt_info);
1250       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1251       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1252 				       stmt_info, 0, vect_body);
1253 
1254       if (dump_enabled_p ())
1255         dump_printf_loc (MSG_NOTE, vect_location,
1256                          "vect_model_load_cost: strided group_size = %d .\n",
1257                          group_size);
1258     }
1259 
1260   /* The loads themselves.  */
1261   if (memory_access_type == VMAT_ELEMENTWISE
1262       || memory_access_type == VMAT_GATHER_SCATTER)
1263     {
1264       /* N scalar loads plus gathering them into a vector.  */
1265       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1266       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1267       inside_cost += record_stmt_cost (cost_vec,
1268 				       ncopies * assumed_nunits,
1269 				       scalar_load, stmt_info, 0, vect_body);
1270     }
1271   else
1272     vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1273 			&inside_cost, &prologue_cost,
1274 			cost_vec, cost_vec, true);
1275   if (memory_access_type == VMAT_ELEMENTWISE
1276       || memory_access_type == VMAT_STRIDED_SLP)
1277     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1278 				     stmt_info, 0, vect_body);
1279 
1280   if (dump_enabled_p ())
1281     dump_printf_loc (MSG_NOTE, vect_location,
1282                      "vect_model_load_cost: inside_cost = %d, "
1283                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1284 }
1285 
1286 
1287 /* Calculate cost of DR's memory access.  */
1288 void
vect_get_load_cost(stmt_vec_info stmt_info,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1289 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1290 		    bool add_realign_cost, unsigned int *inside_cost,
1291 		    unsigned int *prologue_cost,
1292 		    stmt_vector_for_cost *prologue_cost_vec,
1293 		    stmt_vector_for_cost *body_cost_vec,
1294 		    bool record_prologue_costs)
1295 {
1296   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1297   int alignment_support_scheme
1298     = vect_supportable_dr_alignment (dr_info, false);
1299 
1300   switch (alignment_support_scheme)
1301     {
1302     case dr_aligned:
1303       {
1304 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1305 					  stmt_info, 0, vect_body);
1306 
1307         if (dump_enabled_p ())
1308           dump_printf_loc (MSG_NOTE, vect_location,
1309                            "vect_model_load_cost: aligned.\n");
1310 
1311         break;
1312       }
1313     case dr_unaligned_supported:
1314       {
1315         /* Here, we assign an additional cost for the unaligned load.  */
1316 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1317 					  unaligned_load, stmt_info,
1318 					  DR_MISALIGNMENT (dr_info),
1319 					  vect_body);
1320 
1321         if (dump_enabled_p ())
1322           dump_printf_loc (MSG_NOTE, vect_location,
1323                            "vect_model_load_cost: unaligned supported by "
1324                            "hardware.\n");
1325 
1326         break;
1327       }
1328     case dr_explicit_realign:
1329       {
1330 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1331 					  vector_load, stmt_info, 0, vect_body);
1332 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1333 					  vec_perm, stmt_info, 0, vect_body);
1334 
1335         /* FIXME: If the misalignment remains fixed across the iterations of
1336            the containing loop, the following cost should be added to the
1337            prologue costs.  */
1338         if (targetm.vectorize.builtin_mask_for_load)
1339 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1340 					    stmt_info, 0, vect_body);
1341 
1342         if (dump_enabled_p ())
1343           dump_printf_loc (MSG_NOTE, vect_location,
1344                            "vect_model_load_cost: explicit realign\n");
1345 
1346         break;
1347       }
1348     case dr_explicit_realign_optimized:
1349       {
1350         if (dump_enabled_p ())
1351           dump_printf_loc (MSG_NOTE, vect_location,
1352                            "vect_model_load_cost: unaligned software "
1353                            "pipelined.\n");
1354 
1355         /* Unaligned software pipeline has a load of an address, an initial
1356            load, and possibly a mask operation to "prime" the loop.  However,
1357            if this is an access in a group of loads, which provide grouped
1358            access, then the above cost should only be considered for one
1359            access in the group.  Inside the loop, there is a load op
1360            and a realignment op.  */
1361 
1362         if (add_realign_cost && record_prologue_costs)
1363           {
1364 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1365 						vector_stmt, stmt_info,
1366 						0, vect_prologue);
1367             if (targetm.vectorize.builtin_mask_for_load)
1368 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1369 						  vector_stmt, stmt_info,
1370 						  0, vect_prologue);
1371           }
1372 
1373 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1374 					  stmt_info, 0, vect_body);
1375 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1376 					  stmt_info, 0, vect_body);
1377 
1378         if (dump_enabled_p ())
1379           dump_printf_loc (MSG_NOTE, vect_location,
1380                            "vect_model_load_cost: explicit realign optimized"
1381                            "\n");
1382 
1383         break;
1384       }
1385 
1386     case dr_unaligned_unsupported:
1387       {
1388         *inside_cost = VECT_MAX_COST;
1389 
1390         if (dump_enabled_p ())
1391           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392                            "vect_model_load_cost: unsupported access.\n");
1393         break;
1394       }
1395 
1396     default:
1397       gcc_unreachable ();
1398     }
1399 }
1400 
1401 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1402    the loop preheader for the vectorized stmt STMT_VINFO.  */
1403 
1404 static void
vect_init_vector_1(stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1405 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1406 		    gimple_stmt_iterator *gsi)
1407 {
1408   if (gsi)
1409     vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1410   else
1411     {
1412       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1413 
1414       if (loop_vinfo)
1415         {
1416 	  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1417 	  basic_block new_bb;
1418 	  edge pe;
1419 
1420 	  if (nested_in_vect_loop_p (loop, stmt_vinfo))
1421 	    loop = loop->inner;
1422 
1423 	  pe = loop_preheader_edge (loop);
1424           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1425           gcc_assert (!new_bb);
1426 	}
1427       else
1428        {
1429           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1430           basic_block bb;
1431           gimple_stmt_iterator gsi_bb_start;
1432 
1433           gcc_assert (bb_vinfo);
1434           bb = BB_VINFO_BB (bb_vinfo);
1435           gsi_bb_start = gsi_after_labels (bb);
1436           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1437        }
1438     }
1439 
1440   if (dump_enabled_p ())
1441     dump_printf_loc (MSG_NOTE, vect_location,
1442 		     "created new init_stmt: %G", new_stmt);
1443 }
1444 
1445 /* Function vect_init_vector.
1446 
1447    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1448    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1449    vector type a vector with all elements equal to VAL is created first.
1450    Place the initialization at GSI if it is not NULL.  Otherwise, place the
1451    initialization at the loop preheader.
1452    Return the DEF of INIT_STMT.
1453    It will be used in the vectorization of STMT_INFO.  */
1454 
1455 tree
vect_init_vector(stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1456 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1457 		  gimple_stmt_iterator *gsi)
1458 {
1459   gimple *init_stmt;
1460   tree new_temp;
1461 
1462   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1463   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1464     {
1465       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1466       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1467 	{
1468 	  /* Scalar boolean value should be transformed into
1469 	     all zeros or all ones value before building a vector.  */
1470 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1471 	    {
1472 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1473 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1474 
1475 	      if (CONSTANT_CLASS_P (val))
1476 		val = integer_zerop (val) ? false_val : true_val;
1477 	      else
1478 		{
1479 		  new_temp = make_ssa_name (TREE_TYPE (type));
1480 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1481 						   val, true_val, false_val);
1482 		  vect_init_vector_1 (stmt_info, init_stmt, gsi);
1483 		  val = new_temp;
1484 		}
1485 	    }
1486 	  else
1487 	    {
1488 	      gimple_seq stmts = NULL;
1489 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1490 		val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1491 				    TREE_TYPE (type), val);
1492 	      else
1493 		/* ???  Condition vectorization expects us to do
1494 		   promotion of invariant/external defs.  */
1495 		val = gimple_convert (&stmts, TREE_TYPE (type), val);
1496 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1497 		   !gsi_end_p (gsi2); )
1498 		{
1499 		  init_stmt = gsi_stmt (gsi2);
1500 		  gsi_remove (&gsi2, false);
1501 		  vect_init_vector_1 (stmt_info, init_stmt, gsi);
1502 		}
1503 	    }
1504 	}
1505       val = build_vector_from_val (type, val);
1506     }
1507 
1508   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1509   init_stmt = gimple_build_assign (new_temp, val);
1510   vect_init_vector_1 (stmt_info, init_stmt, gsi);
1511   return new_temp;
1512 }
1513 
1514 /* Function vect_get_vec_def_for_operand_1.
1515 
1516    For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1517    with type DT that will be used in the vectorized stmt.  */
1518 
1519 tree
vect_get_vec_def_for_operand_1(stmt_vec_info def_stmt_info,enum vect_def_type dt)1520 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1521 				enum vect_def_type dt)
1522 {
1523   tree vec_oprnd;
1524   stmt_vec_info vec_stmt_info;
1525 
1526   switch (dt)
1527     {
1528     /* operand is a constant or a loop invariant.  */
1529     case vect_constant_def:
1530     case vect_external_def:
1531       /* Code should use vect_get_vec_def_for_operand.  */
1532       gcc_unreachable ();
1533 
1534     /* Operand is defined by a loop header phi.  In case of nested
1535        cycles we also may have uses of the backedge def.  */
1536     case vect_reduction_def:
1537     case vect_double_reduction_def:
1538     case vect_nested_cycle:
1539     case vect_induction_def:
1540       gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1541 		  || dt == vect_nested_cycle);
1542       /* Fallthru.  */
1543 
1544     /* operand is defined inside the loop.  */
1545     case vect_internal_def:
1546       {
1547         /* Get the def from the vectorized stmt.  */
1548 	vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1549 	/* Get vectorized pattern statement.  */
1550 	if (!vec_stmt_info
1551 	    && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1552 	    && !STMT_VINFO_RELEVANT (def_stmt_info))
1553 	  vec_stmt_info = (STMT_VINFO_VEC_STMT
1554 			   (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1555 	gcc_assert (vec_stmt_info);
1556 	if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1557 	  vec_oprnd = PHI_RESULT (phi);
1558 	else
1559 	  vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1560 	return vec_oprnd;
1561       }
1562 
1563     default:
1564       gcc_unreachable ();
1565     }
1566 }
1567 
1568 
1569 /* Function vect_get_vec_def_for_operand.
1570 
1571    OP is an operand in STMT_VINFO.  This function returns a (vector) def
1572    that will be used in the vectorized stmt for STMT_VINFO.
1573 
1574    In the case that OP is an SSA_NAME which is defined in the loop, then
1575    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1576 
1577    In case OP is an invariant or constant, a new stmt that creates a vector def
1578    needs to be introduced.  VECTYPE may be used to specify a required type for
1579    vector invariant.  */
1580 
1581 tree
vect_get_vec_def_for_operand(tree op,stmt_vec_info stmt_vinfo,tree vectype)1582 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1583 {
1584   gimple *def_stmt;
1585   enum vect_def_type dt;
1586   bool is_simple_use;
1587   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1588 
1589   if (dump_enabled_p ())
1590     dump_printf_loc (MSG_NOTE, vect_location,
1591 		     "vect_get_vec_def_for_operand: %T\n", op);
1592 
1593   stmt_vec_info def_stmt_info;
1594   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1595 				      &def_stmt_info, &def_stmt);
1596   gcc_assert (is_simple_use);
1597   if (def_stmt && dump_enabled_p ())
1598     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1599 
1600   if (dt == vect_constant_def || dt == vect_external_def)
1601     {
1602       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1603       tree vector_type;
1604 
1605       if (vectype)
1606 	vector_type = vectype;
1607       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1608 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1609 	vector_type = truth_type_for (stmt_vectype);
1610       else
1611 	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1612 
1613       gcc_assert (vector_type);
1614       return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1615     }
1616   else
1617     return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1618 }
1619 
1620 
1621 /* Function vect_get_vec_def_for_stmt_copy
1622 
1623    Return a vector-def for an operand.  This function is used when the
1624    vectorized stmt to be created (by the caller to this function) is a "copy"
1625    created in case the vectorized result cannot fit in one vector, and several
1626    copies of the vector-stmt are required.  In this case the vector-def is
1627    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1628    of the stmt that defines VEC_OPRND.  VINFO describes the vectorization.
1629 
1630    Context:
1631         In case the vectorization factor (VF) is bigger than the number
1632    of elements that can fit in a vectype (nunits), we have to generate
1633    more than one vector stmt to vectorize the scalar stmt.  This situation
1634    arises when there are multiple data-types operated upon in the loop; the
1635    smallest data-type determines the VF, and as a result, when vectorizing
1636    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1637    vector stmt (each computing a vector of 'nunits' results, and together
1638    computing 'VF' results in each iteration).  This function is called when
1639    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1640    which VF=16 and nunits=4, so the number of copies required is 4):
1641 
1642    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1643 
1644    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1645                         VS1.1:  vx.1 = memref1      VS1.2
1646                         VS1.2:  vx.2 = memref2      VS1.3
1647                         VS1.3:  vx.3 = memref3
1648 
1649    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1650                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1651                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1652                         VSnew.3:  vz3 = vx.3 + ...
1653 
1654    The vectorization of S1 is explained in vectorizable_load.
1655    The vectorization of S2:
1656         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1657    the function 'vect_get_vec_def_for_operand' is called to
1658    get the relevant vector-def for each operand of S2.  For operand x it
1659    returns  the vector-def 'vx.0'.
1660 
1661         To create the remaining copies of the vector-stmt (VSnew.j), this
1662    function is called to get the relevant vector-def for each operand.  It is
1663    obtained from the respective VS1.j stmt, which is recorded in the
1664    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1665 
1666         For example, to obtain the vector-def 'vx.1' in order to create the
1667    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1668    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1669    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1670    and return its def ('vx.1').
1671    Overall, to create the above sequence this function will be called 3 times:
1672 	vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1673 	vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1674 	vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2);  */
1675 
1676 tree
vect_get_vec_def_for_stmt_copy(vec_info * vinfo,tree vec_oprnd)1677 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1678 {
1679   stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1680   if (!def_stmt_info)
1681     /* Do nothing; can reuse same def.  */
1682     return vec_oprnd;
1683 
1684   def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1685   gcc_assert (def_stmt_info);
1686   if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1687     vec_oprnd = PHI_RESULT (phi);
1688   else
1689     vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1690   return vec_oprnd;
1691 }
1692 
1693 
1694 /* Get vectorized definitions for the operands to create a copy of an original
1695    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1696 
1697 void
vect_get_vec_defs_for_stmt_copy(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1698 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1699 				 vec<tree> *vec_oprnds0,
1700 				 vec<tree> *vec_oprnds1)
1701 {
1702   tree vec_oprnd = vec_oprnds0->pop ();
1703 
1704   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705   vec_oprnds0->quick_push (vec_oprnd);
1706 
1707   if (vec_oprnds1 && vec_oprnds1->length ())
1708     {
1709       vec_oprnd = vec_oprnds1->pop ();
1710       vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1711       vec_oprnds1->quick_push (vec_oprnd);
1712     }
1713 }
1714 
1715 
1716 /* Get vectorized definitions for OP0 and OP1.  */
1717 
1718 void
vect_get_vec_defs(tree op0,tree op1,stmt_vec_info stmt_info,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1719 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1720 		   vec<tree> *vec_oprnds0,
1721 		   vec<tree> *vec_oprnds1,
1722 		   slp_tree slp_node)
1723 {
1724   if (slp_node)
1725     {
1726       auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1727       vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1728       *vec_oprnds0 = vec_defs[0];
1729       if (op1)
1730 	*vec_oprnds1 = vec_defs[1];
1731     }
1732   else
1733     {
1734       tree vec_oprnd;
1735 
1736       vec_oprnds0->create (1);
1737       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1738       vec_oprnds0->quick_push (vec_oprnd);
1739 
1740       if (op1)
1741 	{
1742 	  vec_oprnds1->create (1);
1743 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1744 	  vec_oprnds1->quick_push (vec_oprnd);
1745 	}
1746     }
1747 }
1748 
1749 /* Helper function called by vect_finish_replace_stmt and
1750    vect_finish_stmt_generation.  Set the location of the new
1751    statement and create and return a stmt_vec_info for it.  */
1752 
1753 static stmt_vec_info
vect_finish_stmt_generation_1(stmt_vec_info stmt_info,gimple * vec_stmt)1754 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1755 {
1756   vec_info *vinfo = stmt_info->vinfo;
1757 
1758   stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1759 
1760   if (dump_enabled_p ())
1761     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1762 
1763   gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1764 
1765   /* While EH edges will generally prevent vectorization, stmt might
1766      e.g. be in a must-not-throw region.  Ensure newly created stmts
1767      that could throw are part of the same region.  */
1768   int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1769   if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1770     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1771 
1772   return vec_stmt_info;
1773 }
1774 
1775 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1776    which sets the same scalar result as STMT_INFO did.  Create and return a
1777    stmt_vec_info for VEC_STMT.  */
1778 
1779 stmt_vec_info
vect_finish_replace_stmt(stmt_vec_info stmt_info,gimple * vec_stmt)1780 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1781 {
1782   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1783   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1784 
1785   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1786   gsi_replace (&gsi, vec_stmt, true);
1787 
1788   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1789 }
1790 
1791 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1792    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1793 
1794 stmt_vec_info
vect_finish_stmt_generation(stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1795 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1796 			     gimple_stmt_iterator *gsi)
1797 {
1798   gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1799 
1800   if (!gsi_end_p (*gsi)
1801       && gimple_has_mem_ops (vec_stmt))
1802     {
1803       gimple *at_stmt = gsi_stmt (*gsi);
1804       tree vuse = gimple_vuse (at_stmt);
1805       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1806 	{
1807 	  tree vdef = gimple_vdef (at_stmt);
1808 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1809 	  /* If we have an SSA vuse and insert a store, update virtual
1810 	     SSA form to avoid triggering the renamer.  Do so only
1811 	     if we can easily see all uses - which is what almost always
1812 	     happens with the way vectorized stmts are inserted.  */
1813 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1814 	      && ((is_gimple_assign (vec_stmt)
1815 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1816 		  || (is_gimple_call (vec_stmt)
1817 		      && !(gimple_call_flags (vec_stmt)
1818 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1819 	    {
1820 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1821 	      gimple_set_vdef (vec_stmt, new_vdef);
1822 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1823 	    }
1824 	}
1825     }
1826   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1827   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1828 }
1829 
1830 /* We want to vectorize a call to combined function CFN with function
1831    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1832    as the types of all inputs.  Check whether this is possible using
1833    an internal function, returning its code if so or IFN_LAST if not.  */
1834 
1835 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1836 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1837 				tree vectype_out, tree vectype_in)
1838 {
1839   internal_fn ifn;
1840   if (internal_fn_p (cfn))
1841     ifn = as_internal_fn (cfn);
1842   else
1843     ifn = associated_internal_fn (fndecl);
1844   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1845     {
1846       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1847       if (info.vectorizable)
1848 	{
1849 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1850 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1851 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1852 					      OPTIMIZE_FOR_SPEED))
1853 	    return ifn;
1854 	}
1855     }
1856   return IFN_LAST;
1857 }
1858 
1859 
1860 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1861 				  gimple_stmt_iterator *);
1862 
1863 /* Check whether a load or store statement in the loop described by
1864    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1865    whether the vectorizer pass has the appropriate support, as well as
1866    whether the target does.
1867 
1868    VLS_TYPE says whether the statement is a load or store and VECTYPE
1869    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1870    says how the load or store is going to be implemented and GROUP_SIZE
1871    is the number of load or store statements in the containing group.
1872    If the access is a gather load or scatter store, GS_INFO describes
1873    its arguments.  If the load or store is conditional, SCALAR_MASK is the
1874    condition under which it occurs.
1875 
1876    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877    supported, otherwise record the required mask types.  */
1878 
1879 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 			  vec_load_store_type vls_type, int group_size,
1882 			  vect_memory_access_type memory_access_type,
1883 			  gather_scatter_info *gs_info, tree scalar_mask)
1884 {
1885   /* Invariant loads need no special support.  */
1886   if (memory_access_type == VMAT_INVARIANT)
1887     return;
1888 
1889   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890   machine_mode vecmode = TYPE_MODE (vectype);
1891   bool is_load = (vls_type == VLS_LOAD);
1892   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1893     {
1894       if (is_load
1895 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1896 	  : !vect_store_lanes_supported (vectype, group_size, true))
1897 	{
1898 	  if (dump_enabled_p ())
1899 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 			     "can't use a fully-masked loop because the"
1901 			     " target doesn't have an appropriate masked"
1902 			     " load/store-lanes instruction.\n");
1903 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 	  return;
1905 	}
1906       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1908       return;
1909     }
1910 
1911   if (memory_access_type == VMAT_GATHER_SCATTER)
1912     {
1913       internal_fn ifn = (is_load
1914 			 ? IFN_MASK_GATHER_LOAD
1915 			 : IFN_MASK_SCATTER_STORE);
1916       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1917 						   gs_info->memory_type,
1918 						   gs_info->offset_vectype,
1919 						   gs_info->scale))
1920 	{
1921 	  if (dump_enabled_p ())
1922 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1923 			     "can't use a fully-masked loop because the"
1924 			     " target doesn't have an appropriate masked"
1925 			     " gather load or scatter store instruction.\n");
1926 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1927 	  return;
1928 	}
1929       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1930       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1931       return;
1932     }
1933 
1934   if (memory_access_type != VMAT_CONTIGUOUS
1935       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1936     {
1937       /* Element X of the data must come from iteration i * VF + X of the
1938 	 scalar loop.  We need more work to support other mappings.  */
1939       if (dump_enabled_p ())
1940 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941 			 "can't use a fully-masked loop because an access"
1942 			 " isn't contiguous.\n");
1943       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1944       return;
1945     }
1946 
1947   machine_mode mask_mode;
1948   if (!VECTOR_MODE_P (vecmode)
1949       || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1950       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1951     {
1952       if (dump_enabled_p ())
1953 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 			 "can't use a fully-masked loop because the target"
1955 			 " doesn't have the appropriate masked load or"
1956 			 " store.\n");
1957       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1958       return;
1959     }
1960   /* We might load more scalars than we need for permuting SLP loads.
1961      We checked in get_group_load_store_type that the extra elements
1962      don't leak into a new vector.  */
1963   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1965   unsigned int nvectors;
1966   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1967     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1968   else
1969     gcc_unreachable ();
1970 }
1971 
1972 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1973    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1974    that needs to be applied to all loads and stores in a vectorized loop.
1975    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1976 
1977    MASK_TYPE is the type of both masks.  If new statements are needed,
1978    insert them before GSI.  */
1979 
1980 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1981 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1982 			 gimple_stmt_iterator *gsi)
1983 {
1984   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1985   if (!loop_mask)
1986     return vec_mask;
1987 
1988   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1989   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1990   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1991 					  vec_mask, loop_mask);
1992   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1993   return and_res;
1994 }
1995 
1996 /* Determine whether we can use a gather load or scatter store to vectorize
1997    strided load or store STMT_INFO by truncating the current offset to a
1998    smaller width.  We need to be able to construct an offset vector:
1999 
2000      { 0, X, X*2, X*3, ... }
2001 
2002    without loss of precision, where X is STMT_INFO's DR_STEP.
2003 
2004    Return true if this is possible, describing the gather load or scatter
2005    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
2006 
2007 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2008 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2009 				     loop_vec_info loop_vinfo, bool masked_p,
2010 				     gather_scatter_info *gs_info)
2011 {
2012   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2013   data_reference *dr = dr_info->dr;
2014   tree step = DR_STEP (dr);
2015   if (TREE_CODE (step) != INTEGER_CST)
2016     {
2017       /* ??? Perhaps we could use range information here?  */
2018       if (dump_enabled_p ())
2019 	dump_printf_loc (MSG_NOTE, vect_location,
2020 			 "cannot truncate variable step.\n");
2021       return false;
2022     }
2023 
2024   /* Get the number of bits in an element.  */
2025   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2026   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2027   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2028 
2029   /* Set COUNT to the upper limit on the number of elements - 1.
2030      Start with the maximum vectorization factor.  */
2031   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2032 
2033   /* Try lowering COUNT to the number of scalar latch iterations.  */
2034   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2035   widest_int max_iters;
2036   if (max_loop_iterations (loop, &max_iters)
2037       && max_iters < count)
2038     count = max_iters.to_shwi ();
2039 
2040   /* Try scales of 1 and the element size.  */
2041   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2042   wi::overflow_type overflow = wi::OVF_NONE;
2043   for (int i = 0; i < 2; ++i)
2044     {
2045       int scale = scales[i];
2046       widest_int factor;
2047       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2048 	continue;
2049 
2050       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
2051       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2052       if (overflow)
2053 	continue;
2054       signop sign = range >= 0 ? UNSIGNED : SIGNED;
2055       unsigned int min_offset_bits = wi::min_precision (range, sign);
2056 
2057       /* Find the narrowest viable offset type.  */
2058       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2059       tree offset_type = build_nonstandard_integer_type (offset_bits,
2060 							 sign == UNSIGNED);
2061 
2062       /* See whether the target supports the operation with an offset
2063 	 no narrower than OFFSET_TYPE.  */
2064       tree memory_type = TREE_TYPE (DR_REF (dr));
2065       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2066 				     vectype, memory_type, offset_type, scale,
2067 				     &gs_info->ifn, &gs_info->offset_vectype))
2068 	continue;
2069 
2070       gs_info->decl = NULL_TREE;
2071       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2072 	 but we don't need to store that here.  */
2073       gs_info->base = NULL_TREE;
2074       gs_info->element_type = TREE_TYPE (vectype);
2075       gs_info->offset = fold_convert (offset_type, step);
2076       gs_info->offset_dt = vect_constant_def;
2077       gs_info->scale = scale;
2078       gs_info->memory_type = memory_type;
2079       return true;
2080     }
2081 
2082   if (overflow && dump_enabled_p ())
2083     dump_printf_loc (MSG_NOTE, vect_location,
2084 		     "truncating gather/scatter offset to %d bits"
2085 		     " might change its value.\n", element_bits);
2086 
2087   return false;
2088 }
2089 
2090 /* Return true if we can use gather/scatter internal functions to
2091    vectorize STMT_INFO, which is a grouped or strided load or store.
2092    MASKED_P is true if load or store is conditional.  When returning
2093    true, fill in GS_INFO with the information required to perform the
2094    operation.  */
2095 
2096 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)2097 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2098 				    loop_vec_info loop_vinfo, bool masked_p,
2099 				    gather_scatter_info *gs_info)
2100 {
2101   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2102       || gs_info->decl)
2103     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2104 						masked_p, gs_info);
2105 
2106   tree old_offset_type = TREE_TYPE (gs_info->offset);
2107   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2108 
2109   gcc_assert (TYPE_PRECISION (new_offset_type)
2110 	      >= TYPE_PRECISION (old_offset_type));
2111   gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2112 
2113   if (dump_enabled_p ())
2114     dump_printf_loc (MSG_NOTE, vect_location,
2115 		     "using gather/scatter for strided/grouped access,"
2116 		     " scale = %d\n", gs_info->scale);
2117 
2118   return true;
2119 }
2120 
2121 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2122    elements with a known constant step.  Return -1 if that step
2123    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
2124 
2125 static int
compare_step_with_zero(stmt_vec_info stmt_info)2126 compare_step_with_zero (stmt_vec_info stmt_info)
2127 {
2128   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2129   return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2130 			       size_zero_node);
2131 }
2132 
2133 /* If the target supports a permute mask that reverses the elements in
2134    a vector of type VECTYPE, return that mask, otherwise return null.  */
2135 
2136 static tree
perm_mask_for_reverse(tree vectype)2137 perm_mask_for_reverse (tree vectype)
2138 {
2139   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2140 
2141   /* The encoding has a single stepped pattern.  */
2142   vec_perm_builder sel (nunits, 1, 3);
2143   for (int i = 0; i < 3; ++i)
2144     sel.quick_push (nunits - 1 - i);
2145 
2146   vec_perm_indices indices (sel, 1, nunits);
2147   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2148     return NULL_TREE;
2149   return vect_gen_perm_mask_checked (vectype, indices);
2150 }
2151 
2152 /* A subroutine of get_load_store_type, with a subset of the same
2153    arguments.  Handle the case where STMT_INFO is a load or store that
2154    accesses consecutive elements with a negative step.  */
2155 
2156 static vect_memory_access_type
get_negative_load_store_type(stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2157 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2158 			      vec_load_store_type vls_type,
2159 			      unsigned int ncopies)
2160 {
2161   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2162   dr_alignment_support alignment_support_scheme;
2163 
2164   if (ncopies > 1)
2165     {
2166       if (dump_enabled_p ())
2167 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 			 "multiple types with negative step.\n");
2169       return VMAT_ELEMENTWISE;
2170     }
2171 
2172   alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2173   if (alignment_support_scheme != dr_aligned
2174       && alignment_support_scheme != dr_unaligned_supported)
2175     {
2176       if (dump_enabled_p ())
2177 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 			 "negative step but alignment required.\n");
2179       return VMAT_ELEMENTWISE;
2180     }
2181 
2182   if (vls_type == VLS_STORE_INVARIANT)
2183     {
2184       if (dump_enabled_p ())
2185 	dump_printf_loc (MSG_NOTE, vect_location,
2186 			 "negative step with invariant source;"
2187 			 " no permute needed.\n");
2188       return VMAT_CONTIGUOUS_DOWN;
2189     }
2190 
2191   if (!perm_mask_for_reverse (vectype))
2192     {
2193       if (dump_enabled_p ())
2194 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195 			 "negative step and reversing not supported.\n");
2196       return VMAT_ELEMENTWISE;
2197     }
2198 
2199   return VMAT_CONTIGUOUS_REVERSE;
2200 }
2201 
2202 /* STMT_INFO is either a masked or unconditional store.  Return the value
2203    being stored.  */
2204 
2205 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2206 vect_get_store_rhs (stmt_vec_info stmt_info)
2207 {
2208   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2209     {
2210       gcc_assert (gimple_assign_single_p (assign));
2211       return gimple_assign_rhs1 (assign);
2212     }
2213   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2214     {
2215       internal_fn ifn = gimple_call_internal_fn (call);
2216       int index = internal_fn_stored_value_index (ifn);
2217       gcc_assert (index >= 0);
2218       return gimple_call_arg (call, index);
2219     }
2220   gcc_unreachable ();
2221 }
2222 
2223 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2224 
2225    This function returns a vector type which can be composed with NETLS pieces,
2226    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
2227    same vector size as the return vector.  It checks target whether supports
2228    pieces-size vector mode for construction firstly, if target fails to, check
2229    pieces-size scalar mode for construction further.  It returns NULL_TREE if
2230    fails to find the available composition.
2231 
2232    For example, for (vtype=V16QI, nelts=4), we can probably get:
2233      - V16QI with PTYPE V4QI.
2234      - V4SI with PTYPE SI.
2235      - NULL_TREE.  */
2236 
2237 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2238 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2239 {
2240   gcc_assert (VECTOR_TYPE_P (vtype));
2241   gcc_assert (known_gt (nelts, 0U));
2242 
2243   machine_mode vmode = TYPE_MODE (vtype);
2244   if (!VECTOR_MODE_P (vmode))
2245     return NULL_TREE;
2246 
2247   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2248   unsigned int pbsize;
2249   if (constant_multiple_p (vbsize, nelts, &pbsize))
2250     {
2251       /* First check if vec_init optab supports construction from
2252 	 vector pieces directly.  */
2253       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2254       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2255       machine_mode rmode;
2256       if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2257 	  && (convert_optab_handler (vec_init_optab, vmode, rmode)
2258 	      != CODE_FOR_nothing))
2259 	{
2260 	  *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2261 	  return vtype;
2262 	}
2263 
2264       /* Otherwise check if exists an integer type of the same piece size and
2265 	 if vec_init optab supports construction from it directly.  */
2266       if (int_mode_for_size (pbsize, 0).exists (&elmode)
2267 	  && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2268 	  && (convert_optab_handler (vec_init_optab, rmode, elmode)
2269 	      != CODE_FOR_nothing))
2270 	{
2271 	  *ptype = build_nonstandard_integer_type (pbsize, 1);
2272 	  return build_vector_type (*ptype, nelts);
2273 	}
2274     }
2275 
2276   return NULL_TREE;
2277 }
2278 
2279 /* A subroutine of get_load_store_type, with a subset of the same
2280    arguments.  Handle the case where STMT_INFO is part of a grouped load
2281    or store.
2282 
2283    For stores, the statements in the group are all consecutive
2284    and there is no gap at the end.  For loads, the statements in the
2285    group might not be consecutive; there can be gaps between statements
2286    as well as at the end.  */
2287 
2288 static bool
get_group_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2289 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2290 			   bool masked_p, vec_load_store_type vls_type,
2291 			   vect_memory_access_type *memory_access_type,
2292 			   gather_scatter_info *gs_info)
2293 {
2294   vec_info *vinfo = stmt_info->vinfo;
2295   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2296   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2297   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2298   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2299   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2300   bool single_element_p = (stmt_info == first_stmt_info
2301 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2302   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2303   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304 
2305   /* True if the vectorized statements would access beyond the last
2306      statement in the group.  */
2307   bool overrun_p = false;
2308 
2309   /* True if we can cope with such overrun by peeling for gaps, so that
2310      there is at least one final scalar iteration after the vector loop.  */
2311   bool can_overrun_p = (!masked_p
2312 			&& vls_type == VLS_LOAD
2313 			&& loop_vinfo
2314 			&& !loop->inner);
2315 
2316   /* There can only be a gap at the end of the group if the stride is
2317      known at compile time.  */
2318   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2319 
2320   /* Stores can't yet have gaps.  */
2321   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2322 
2323   if (slp)
2324     {
2325       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2326 	{
2327 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2328 	     separated by the stride, until we have a complete vector.
2329 	     Fall back to scalar accesses if that isn't possible.  */
2330 	  if (multiple_p (nunits, group_size))
2331 	    *memory_access_type = VMAT_STRIDED_SLP;
2332 	  else
2333 	    *memory_access_type = VMAT_ELEMENTWISE;
2334 	}
2335       else
2336 	{
2337 	  overrun_p = loop_vinfo && gap != 0;
2338 	  if (overrun_p && vls_type != VLS_LOAD)
2339 	    {
2340 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2341 			       "Grouped store with gaps requires"
2342 			       " non-consecutive accesses\n");
2343 	      return false;
2344 	    }
2345 	  /* An overrun is fine if the trailing elements are smaller
2346 	     than the alignment boundary B.  Every vector access will
2347 	     be a multiple of B and so we are guaranteed to access a
2348 	     non-gap element in the same B-sized block.  */
2349 	  if (overrun_p
2350 	      && gap < (vect_known_alignment_in_bytes (first_dr_info)
2351 			/ vect_get_scalar_dr_size (first_dr_info)))
2352 	    overrun_p = false;
2353 
2354 	  /* If the gap splits the vector in half and the target
2355 	     can do half-vector operations avoid the epilogue peeling
2356 	     by simply loading half of the vector only.  Usually
2357 	     the construction with an upper zero half will be elided.  */
2358 	  dr_alignment_support alignment_support_scheme;
2359 	  tree half_vtype;
2360 	  if (overrun_p
2361 	      && !masked_p
2362 	      && (((alignment_support_scheme
2363 		      = vect_supportable_dr_alignment (first_dr_info, false)))
2364 		   == dr_aligned
2365 		  || alignment_support_scheme == dr_unaligned_supported)
2366 	      && known_eq (nunits, (group_size - gap) * 2)
2367 	      && known_eq (nunits, group_size)
2368 	      && (vector_vector_composition_type (vectype, 2, &half_vtype)
2369 		  != NULL_TREE))
2370 	    overrun_p = false;
2371 
2372 	  if (overrun_p && !can_overrun_p)
2373 	    {
2374 	      if (dump_enabled_p ())
2375 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 				 "Peeling for outer loop is not supported\n");
2377 	      return false;
2378 	    }
2379 	  int cmp = compare_step_with_zero (stmt_info);
2380 	  if (cmp < 0)
2381 	    *memory_access_type = get_negative_load_store_type
2382 	      (stmt_info, vectype, vls_type, 1);
2383 	  else
2384 	    {
2385 	      gcc_assert (!loop_vinfo || cmp > 0);
2386 	      *memory_access_type = VMAT_CONTIGUOUS;
2387 	    }
2388 	}
2389     }
2390   else
2391     {
2392       /* We can always handle this case using elementwise accesses,
2393 	 but see if something more efficient is available.  */
2394       *memory_access_type = VMAT_ELEMENTWISE;
2395 
2396       /* If there is a gap at the end of the group then these optimizations
2397 	 would access excess elements in the last iteration.  */
2398       bool would_overrun_p = (gap != 0);
2399       /* An overrun is fine if the trailing elements are smaller than the
2400 	 alignment boundary B.  Every vector access will be a multiple of B
2401 	 and so we are guaranteed to access a non-gap element in the
2402 	 same B-sized block.  */
2403       if (would_overrun_p
2404 	  && !masked_p
2405 	  && gap < (vect_known_alignment_in_bytes (first_dr_info)
2406 		    / vect_get_scalar_dr_size (first_dr_info)))
2407 	would_overrun_p = false;
2408 
2409       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2410 	  && (can_overrun_p || !would_overrun_p)
2411 	  && compare_step_with_zero (stmt_info) > 0)
2412 	{
2413 	  /* First cope with the degenerate case of a single-element
2414 	     vector.  */
2415 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2416 	    *memory_access_type = VMAT_CONTIGUOUS;
2417 
2418 	  /* Otherwise try using LOAD/STORE_LANES.  */
2419 	  if (*memory_access_type == VMAT_ELEMENTWISE
2420 	      && (vls_type == VLS_LOAD
2421 		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
2422 		  : vect_store_lanes_supported (vectype, group_size,
2423 						masked_p)))
2424 	    {
2425 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2426 	      overrun_p = would_overrun_p;
2427 	    }
2428 
2429 	  /* If that fails, try using permuting loads.  */
2430 	  if (*memory_access_type == VMAT_ELEMENTWISE
2431 	      && (vls_type == VLS_LOAD
2432 		  ? vect_grouped_load_supported (vectype, single_element_p,
2433 						 group_size)
2434 		  : vect_grouped_store_supported (vectype, group_size)))
2435 	    {
2436 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2437 	      overrun_p = would_overrun_p;
2438 	    }
2439 	}
2440 
2441       /* As a last resort, trying using a gather load or scatter store.
2442 
2443 	 ??? Although the code can handle all group sizes correctly,
2444 	 it probably isn't a win to use separate strided accesses based
2445 	 on nearby locations.  Or, even if it's a win over scalar code,
2446 	 it might not be a win over vectorizing at a lower VF, if that
2447 	 allows us to use contiguous accesses.  */
2448       if (*memory_access_type == VMAT_ELEMENTWISE
2449 	  && single_element_p
2450 	  && loop_vinfo
2451 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2452 						 masked_p, gs_info))
2453 	*memory_access_type = VMAT_GATHER_SCATTER;
2454     }
2455 
2456   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2457     {
2458       /* STMT is the leader of the group. Check the operands of all the
2459 	 stmts of the group.  */
2460       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2461       while (next_stmt_info)
2462 	{
2463 	  tree op = vect_get_store_rhs (next_stmt_info);
2464 	  enum vect_def_type dt;
2465 	  if (!vect_is_simple_use (op, vinfo, &dt))
2466 	    {
2467 	      if (dump_enabled_p ())
2468 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2469 				 "use not simple.\n");
2470 	      return false;
2471 	    }
2472 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2473 	}
2474     }
2475 
2476   if (overrun_p)
2477     {
2478       gcc_assert (can_overrun_p);
2479       if (dump_enabled_p ())
2480 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2481 			 "Data access with gaps requires scalar "
2482 			 "epilogue loop\n");
2483       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2484     }
2485 
2486   return true;
2487 }
2488 
2489 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2490    if there is a memory access type that the vectorized form can use,
2491    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2492    or scatters, fill in GS_INFO accordingly.
2493 
2494    SLP says whether we're performing SLP rather than loop vectorization.
2495    MASKED_P is true if the statement is conditional on a vectorized mask.
2496    VECTYPE is the vector type that the vectorized statements will use.
2497    NCOPIES is the number of vector statements that will be needed.  */
2498 
2499 static bool
get_load_store_type(stmt_vec_info stmt_info,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2500 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2501 		     bool masked_p, vec_load_store_type vls_type,
2502 		     unsigned int ncopies,
2503 		     vect_memory_access_type *memory_access_type,
2504 		     gather_scatter_info *gs_info)
2505 {
2506   vec_info *vinfo = stmt_info->vinfo;
2507   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2508   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2509   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2510     {
2511       *memory_access_type = VMAT_GATHER_SCATTER;
2512       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2513 	gcc_unreachable ();
2514       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2515 				    &gs_info->offset_dt,
2516 				    &gs_info->offset_vectype))
2517 	{
2518 	  if (dump_enabled_p ())
2519 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2520 			     "%s index use not simple.\n",
2521 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2522 	  return false;
2523 	}
2524     }
2525   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2526     {
2527       if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2528 				      vls_type, memory_access_type, gs_info))
2529 	return false;
2530     }
2531   else if (STMT_VINFO_STRIDED_P (stmt_info))
2532     {
2533       gcc_assert (!slp);
2534       if (loop_vinfo
2535 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2536 						 masked_p, gs_info))
2537 	*memory_access_type = VMAT_GATHER_SCATTER;
2538       else
2539 	*memory_access_type = VMAT_ELEMENTWISE;
2540     }
2541   else
2542     {
2543       int cmp = compare_step_with_zero (stmt_info);
2544       if (cmp < 0)
2545 	*memory_access_type = get_negative_load_store_type
2546 	  (stmt_info, vectype, vls_type, ncopies);
2547       else if (cmp == 0)
2548 	{
2549 	  gcc_assert (vls_type == VLS_LOAD);
2550 	  *memory_access_type = VMAT_INVARIANT;
2551 	}
2552       else
2553 	*memory_access_type = VMAT_CONTIGUOUS;
2554     }
2555 
2556   if ((*memory_access_type == VMAT_ELEMENTWISE
2557        || *memory_access_type == VMAT_STRIDED_SLP)
2558       && !nunits.is_constant ())
2559     {
2560       if (dump_enabled_p ())
2561 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 			 "Not using elementwise accesses due to variable "
2563 			 "vectorization factor.\n");
2564       return false;
2565     }
2566 
2567   /* FIXME: At the moment the cost model seems to underestimate the
2568      cost of using elementwise accesses.  This check preserves the
2569      traditional behavior until that can be fixed.  */
2570   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2571   if (!first_stmt_info)
2572     first_stmt_info = stmt_info;
2573   if (*memory_access_type == VMAT_ELEMENTWISE
2574       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2575       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2576 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2577 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2578     {
2579       if (dump_enabled_p ())
2580 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2581 			 "not falling back to elementwise accesses\n");
2582       return false;
2583     }
2584   return true;
2585 }
2586 
2587 /* Return true if boolean argument MASK is suitable for vectorizing
2588    conditional operation STMT_INFO.  When returning true, store the type
2589    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2590    in *MASK_VECTYPE_OUT.  */
2591 
2592 static bool
vect_check_scalar_mask(stmt_vec_info stmt_info,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2593 vect_check_scalar_mask (stmt_vec_info stmt_info, tree mask,
2594 			vect_def_type *mask_dt_out,
2595 			tree *mask_vectype_out)
2596 {
2597   vec_info *vinfo = stmt_info->vinfo;
2598   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2599     {
2600       if (dump_enabled_p ())
2601 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 			 "mask argument is not a boolean.\n");
2603       return false;
2604     }
2605 
2606   if (TREE_CODE (mask) != SSA_NAME)
2607     {
2608       if (dump_enabled_p ())
2609 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2610 			 "mask argument is not an SSA name.\n");
2611       return false;
2612     }
2613 
2614   enum vect_def_type mask_dt;
2615   tree mask_vectype;
2616   if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2617     {
2618       if (dump_enabled_p ())
2619 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2620 			 "mask use not simple.\n");
2621       return false;
2622     }
2623 
2624   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2625   if (!mask_vectype)
2626     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2627 
2628   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2629     {
2630       if (dump_enabled_p ())
2631 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2632 			 "could not find an appropriate vector mask type.\n");
2633       return false;
2634     }
2635 
2636   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2637 		TYPE_VECTOR_SUBPARTS (vectype)))
2638     {
2639       if (dump_enabled_p ())
2640 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 			 "vector mask type %T"
2642 			 " does not match vector data type %T.\n",
2643 			 mask_vectype, vectype);
2644 
2645       return false;
2646     }
2647 
2648   *mask_dt_out = mask_dt;
2649   *mask_vectype_out = mask_vectype;
2650   return true;
2651 }
2652 
2653 /* Return true if stored value RHS is suitable for vectorizing store
2654    statement STMT_INFO.  When returning true, store the type of the
2655    definition in *RHS_DT_OUT, the type of the vectorized store value in
2656    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2657 
2658 static bool
vect_check_store_rhs(stmt_vec_info stmt_info,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2659 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2660 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2661 		      vec_load_store_type *vls_type_out)
2662 {
2663   /* In the case this is a store from a constant make sure
2664      native_encode_expr can handle it.  */
2665   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2666     {
2667       if (dump_enabled_p ())
2668 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2669 			 "cannot encode constant as a byte sequence.\n");
2670       return false;
2671     }
2672 
2673   enum vect_def_type rhs_dt;
2674   tree rhs_vectype;
2675   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2676     {
2677       if (dump_enabled_p ())
2678 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2679 			 "use not simple.\n");
2680       return false;
2681     }
2682 
2683   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2684   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2685     {
2686       if (dump_enabled_p ())
2687 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2688 			 "incompatible vector types.\n");
2689       return false;
2690     }
2691 
2692   *rhs_dt_out = rhs_dt;
2693   *rhs_vectype_out = rhs_vectype;
2694   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2695     *vls_type_out = VLS_STORE_INVARIANT;
2696   else
2697     *vls_type_out = VLS_STORE;
2698   return true;
2699 }
2700 
2701 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2702    Note that we support masks with floating-point type, in which case the
2703    floats are interpreted as a bitmask.  */
2704 
2705 static tree
vect_build_all_ones_mask(stmt_vec_info stmt_info,tree masktype)2706 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2707 {
2708   if (TREE_CODE (masktype) == INTEGER_TYPE)
2709     return build_int_cst (masktype, -1);
2710   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2711     {
2712       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2713       mask = build_vector_from_val (masktype, mask);
2714       return vect_init_vector (stmt_info, mask, masktype, NULL);
2715     }
2716   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2717     {
2718       REAL_VALUE_TYPE r;
2719       long tmp[6];
2720       for (int j = 0; j < 6; ++j)
2721 	tmp[j] = -1;
2722       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2723       tree mask = build_real (TREE_TYPE (masktype), r);
2724       mask = build_vector_from_val (masktype, mask);
2725       return vect_init_vector (stmt_info, mask, masktype, NULL);
2726     }
2727   gcc_unreachable ();
2728 }
2729 
2730 /* Build an all-zero merge value of type VECTYPE while vectorizing
2731    STMT_INFO as a gather load.  */
2732 
2733 static tree
vect_build_zero_merge_argument(stmt_vec_info stmt_info,tree vectype)2734 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2735 {
2736   tree merge;
2737   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2738     merge = build_int_cst (TREE_TYPE (vectype), 0);
2739   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2740     {
2741       REAL_VALUE_TYPE r;
2742       long tmp[6];
2743       for (int j = 0; j < 6; ++j)
2744 	tmp[j] = 0;
2745       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2746       merge = build_real (TREE_TYPE (vectype), r);
2747     }
2748   else
2749     gcc_unreachable ();
2750   merge = build_vector_from_val (vectype, merge);
2751   return vect_init_vector (stmt_info, merge, vectype, NULL);
2752 }
2753 
2754 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2755    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2756    the gather load operation.  If the load is conditional, MASK is the
2757    unvectorized condition and MASK_DT is its definition type, otherwise
2758    MASK is null.  */
2759 
2760 static void
vect_build_gather_load_calls(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,gather_scatter_info * gs_info,tree mask)2761 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2762 			      gimple_stmt_iterator *gsi,
2763 			      stmt_vec_info *vec_stmt,
2764 			      gather_scatter_info *gs_info,
2765 			      tree mask)
2766 {
2767   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2768   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2769   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2770   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2771   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2772   edge pe = loop_preheader_edge (loop);
2773   enum { NARROW, NONE, WIDEN } modifier;
2774   poly_uint64 gather_off_nunits
2775     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2776 
2777   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2778   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2779   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2780   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2781   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2782   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2783   tree scaletype = TREE_VALUE (arglist);
2784   tree real_masktype = masktype;
2785   gcc_checking_assert (types_compatible_p (srctype, rettype)
2786 		       && (!mask
2787 			   || TREE_CODE (masktype) == INTEGER_TYPE
2788 			   || types_compatible_p (srctype, masktype)));
2789   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2790     masktype = truth_type_for (srctype);
2791 
2792   tree mask_halftype = masktype;
2793   tree perm_mask = NULL_TREE;
2794   tree mask_perm_mask = NULL_TREE;
2795   if (known_eq (nunits, gather_off_nunits))
2796     modifier = NONE;
2797   else if (known_eq (nunits * 2, gather_off_nunits))
2798     {
2799       modifier = WIDEN;
2800 
2801       /* Currently widening gathers and scatters are only supported for
2802 	 fixed-length vectors.  */
2803       int count = gather_off_nunits.to_constant ();
2804       vec_perm_builder sel (count, count, 1);
2805       for (int i = 0; i < count; ++i)
2806 	sel.quick_push (i | (count / 2));
2807 
2808       vec_perm_indices indices (sel, 1, count);
2809       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2810 					      indices);
2811     }
2812   else if (known_eq (nunits, gather_off_nunits * 2))
2813     {
2814       modifier = NARROW;
2815 
2816       /* Currently narrowing gathers and scatters are only supported for
2817 	 fixed-length vectors.  */
2818       int count = nunits.to_constant ();
2819       vec_perm_builder sel (count, count, 1);
2820       sel.quick_grow (count);
2821       for (int i = 0; i < count; ++i)
2822 	sel[i] = i < count / 2 ? i : i + count / 2;
2823       vec_perm_indices indices (sel, 2, count);
2824       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2825 
2826       ncopies *= 2;
2827 
2828       if (mask && masktype == real_masktype)
2829 	{
2830 	  for (int i = 0; i < count; ++i)
2831 	    sel[i] = i | (count / 2);
2832 	  indices.new_vector (sel, 2, count);
2833 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2834 	}
2835       else if (mask)
2836 	mask_halftype = truth_type_for (gs_info->offset_vectype);
2837     }
2838   else
2839     gcc_unreachable ();
2840 
2841   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2842   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2843 
2844   tree ptr = fold_convert (ptrtype, gs_info->base);
2845   if (!is_gimple_min_invariant (ptr))
2846     {
2847       gimple_seq seq;
2848       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2849       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2850       gcc_assert (!new_bb);
2851     }
2852 
2853   tree scale = build_int_cst (scaletype, gs_info->scale);
2854 
2855   tree vec_oprnd0 = NULL_TREE;
2856   tree vec_mask = NULL_TREE;
2857   tree src_op = NULL_TREE;
2858   tree mask_op = NULL_TREE;
2859   tree prev_res = NULL_TREE;
2860   stmt_vec_info prev_stmt_info = NULL;
2861 
2862   if (!mask)
2863     {
2864       src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2865       mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2866     }
2867 
2868   for (int j = 0; j < ncopies; ++j)
2869     {
2870       tree op, var;
2871       if (modifier == WIDEN && (j & 1))
2872 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2873 				   perm_mask, stmt_info, gsi);
2874       else if (j == 0)
2875 	op = vec_oprnd0
2876 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2877       else
2878 	op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2879 							  vec_oprnd0);
2880 
2881       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2882 	{
2883 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2884 				TYPE_VECTOR_SUBPARTS (idxtype)));
2885 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2886 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2887 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2888 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2889 	  op = var;
2890 	}
2891 
2892       if (mask)
2893 	{
2894 	  if (mask_perm_mask && (j & 1))
2895 	    mask_op = permute_vec_elements (mask_op, mask_op,
2896 					    mask_perm_mask, stmt_info, gsi);
2897 	  else
2898 	    {
2899 	      if (j == 0)
2900 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2901 	      else if (modifier != NARROW || (j & 1) == 0)
2902 		vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2903 							   vec_mask);
2904 
2905 	      mask_op = vec_mask;
2906 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2907 		{
2908 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2909 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2910 		  gcc_assert (known_eq (sub1, sub2));
2911 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2912 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2913 		  gassign *new_stmt
2914 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2915 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2916 		  mask_op = var;
2917 		}
2918 	    }
2919 	  if (modifier == NARROW && masktype != real_masktype)
2920 	    {
2921 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2922 	      gassign *new_stmt
2923 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2924 						    : VEC_UNPACK_LO_EXPR,
2925 				       mask_op);
2926 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2927 	      mask_op = var;
2928 	    }
2929 	  src_op = mask_op;
2930 	}
2931 
2932       tree mask_arg = mask_op;
2933       if (masktype != real_masktype)
2934 	{
2935 	  tree utype, optype = TREE_TYPE (mask_op);
2936 	  if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2937 	    utype = real_masktype;
2938 	  else
2939 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2940 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
2941 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2942 	  gassign *new_stmt
2943 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2944 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2945 	  mask_arg = var;
2946 	  if (!useless_type_conversion_p (real_masktype, utype))
2947 	    {
2948 	      gcc_assert (TYPE_PRECISION (utype)
2949 			  <= TYPE_PRECISION (real_masktype));
2950 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2951 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2952 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2953 	      mask_arg = var;
2954 	    }
2955 	  src_op = build_zero_cst (srctype);
2956 	}
2957       gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2958 					   mask_arg, scale);
2959 
2960       stmt_vec_info new_stmt_info;
2961       if (!useless_type_conversion_p (vectype, rettype))
2962 	{
2963 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2964 				TYPE_VECTOR_SUBPARTS (rettype)));
2965 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2966 	  gimple_call_set_lhs (new_call, op);
2967 	  vect_finish_stmt_generation (stmt_info, new_call, gsi);
2968 	  var = make_ssa_name (vec_dest);
2969 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2970 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2971 	  new_stmt_info
2972 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2973 	}
2974       else
2975 	{
2976 	  var = make_ssa_name (vec_dest, new_call);
2977 	  gimple_call_set_lhs (new_call, var);
2978 	  new_stmt_info
2979 	    = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2980 	}
2981 
2982       if (modifier == NARROW)
2983 	{
2984 	  if ((j & 1) == 0)
2985 	    {
2986 	      prev_res = var;
2987 	      continue;
2988 	    }
2989 	  var = permute_vec_elements (prev_res, var, perm_mask,
2990 				      stmt_info, gsi);
2991 	  new_stmt_info = loop_vinfo->lookup_def (var);
2992 	}
2993 
2994       if (prev_stmt_info == NULL)
2995 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2996       else
2997 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2998       prev_stmt_info = new_stmt_info;
2999     }
3000 }
3001 
3002 /* Prepare the base and offset in GS_INFO for vectorization.
3003    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3004    to the vectorized offset argument for the first copy of STMT_INFO.
3005    STMT_INFO is the statement described by GS_INFO and LOOP is the
3006    containing loop.  */
3007 
3008 static void
vect_get_gather_scatter_ops(class loop * loop,stmt_vec_info stmt_info,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)3009 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
3010 			     gather_scatter_info *gs_info,
3011 			     tree *dataref_ptr, tree *vec_offset)
3012 {
3013   gimple_seq stmts = NULL;
3014   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3015   if (stmts != NULL)
3016     {
3017       basic_block new_bb;
3018       edge pe = loop_preheader_edge (loop);
3019       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3020       gcc_assert (!new_bb);
3021     }
3022   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
3023 					      gs_info->offset_vectype);
3024 }
3025 
3026 /* Prepare to implement a grouped or strided load or store using
3027    the gather load or scatter store operation described by GS_INFO.
3028    STMT_INFO is the load or store statement.
3029 
3030    Set *DATAREF_BUMP to the amount that should be added to the base
3031    address after each copy of the vectorized statement.  Set *VEC_OFFSET
3032    to an invariant offset vector in which element I has the value
3033    I * DR_STEP / SCALE.  */
3034 
3035 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3036 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3037 				 loop_vec_info loop_vinfo,
3038 				 gather_scatter_info *gs_info,
3039 				 tree *dataref_bump, tree *vec_offset)
3040 {
3041   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3042   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3043   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3044   gimple_seq stmts;
3045 
3046   tree bump = size_binop (MULT_EXPR,
3047 			  fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3048 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3049   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3050   if (stmts)
3051     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3052 
3053   /* The offset given in GS_INFO can have pointer type, so use the element
3054      type of the vector instead.  */
3055   tree offset_type = TREE_TYPE (gs_info->offset);
3056   offset_type = TREE_TYPE (gs_info->offset_vectype);
3057 
3058   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
3059   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3060 			  ssize_int (gs_info->scale));
3061   step = fold_convert (offset_type, step);
3062   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3063 
3064   /* Create {0, X, X*2, X*3, ...}.  */
3065   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3066 			      build_zero_cst (offset_type), step);
3067   if (stmts)
3068     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3069 }
3070 
3071 /* Return the amount that should be added to a vector pointer to move
3072    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
3073    being vectorized and MEMORY_ACCESS_TYPE describes the type of
3074    vectorization.  */
3075 
3076 static tree
vect_get_data_ptr_increment(dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3077 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3078 			     vect_memory_access_type memory_access_type)
3079 {
3080   if (memory_access_type == VMAT_INVARIANT)
3081     return size_zero_node;
3082 
3083   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3084   tree step = vect_dr_behavior (dr_info)->step;
3085   if (tree_int_cst_sgn (step) == -1)
3086     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3087   return iv_step;
3088 }
3089 
3090 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
3091 
3092 static bool
vectorizable_bswap(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,tree vectype_in,stmt_vector_for_cost * cost_vec)3093 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3094 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
3095 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
3096 {
3097   tree op, vectype;
3098   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3099   vec_info *vinfo = stmt_info->vinfo;
3100   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3101   unsigned ncopies;
3102 
3103   op = gimple_call_arg (stmt, 0);
3104   vectype = STMT_VINFO_VECTYPE (stmt_info);
3105   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3106 
3107   /* Multiple types in SLP are handled by creating the appropriate number of
3108      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3109      case of SLP.  */
3110   if (slp_node)
3111     ncopies = 1;
3112   else
3113     ncopies = vect_get_num_copies (loop_vinfo, vectype);
3114 
3115   gcc_assert (ncopies >= 1);
3116 
3117   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3118   if (! char_vectype)
3119     return false;
3120 
3121   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3122   unsigned word_bytes;
3123   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3124     return false;
3125 
3126   /* The encoding uses one stepped pattern for each byte in the word.  */
3127   vec_perm_builder elts (num_bytes, word_bytes, 3);
3128   for (unsigned i = 0; i < 3; ++i)
3129     for (unsigned j = 0; j < word_bytes; ++j)
3130       elts.quick_push ((i + 1) * word_bytes - j - 1);
3131 
3132   vec_perm_indices indices (elts, 1, num_bytes);
3133   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3134     return false;
3135 
3136   if (! vec_stmt)
3137     {
3138       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3139       DUMP_VECT_SCOPE ("vectorizable_bswap");
3140       if (! slp_node)
3141 	{
3142 	  record_stmt_cost (cost_vec,
3143 			    1, vector_stmt, stmt_info, 0, vect_prologue);
3144 	  record_stmt_cost (cost_vec,
3145 			    ncopies, vec_perm, stmt_info, 0, vect_body);
3146 	}
3147       return true;
3148     }
3149 
3150   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3151 
3152   /* Transform.  */
3153   vec<tree> vec_oprnds = vNULL;
3154   stmt_vec_info new_stmt_info = NULL;
3155   stmt_vec_info prev_stmt_info = NULL;
3156   for (unsigned j = 0; j < ncopies; j++)
3157     {
3158       /* Handle uses.  */
3159       if (j == 0)
3160 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3161       else
3162 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3163 
3164       /* Arguments are ready. create the new vector stmt.  */
3165       unsigned i;
3166       tree vop;
3167       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3168        {
3169 	 gimple *new_stmt;
3170 	 tree tem = make_ssa_name (char_vectype);
3171 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3172 						      char_vectype, vop));
3173 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3174 	 tree tem2 = make_ssa_name (char_vectype);
3175 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3176 					 tem, tem, bswap_vconst);
3177 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3178 	 tem = make_ssa_name (vectype);
3179 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3180 						      vectype, tem2));
3181 	 new_stmt_info
3182 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3183          if (slp_node)
3184 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3185        }
3186 
3187       if (slp_node)
3188         continue;
3189 
3190       if (j == 0)
3191 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3192       else
3193 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3194 
3195       prev_stmt_info = new_stmt_info;
3196     }
3197 
3198   vec_oprnds.release ();
3199   return true;
3200 }
3201 
3202 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3203    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3204    in a single step.  On success, store the binary pack code in
3205    *CONVERT_CODE.  */
3206 
3207 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3208 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3209 			  tree_code *convert_code)
3210 {
3211   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3212       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3213     return false;
3214 
3215   tree_code code;
3216   int multi_step_cvt = 0;
3217   auto_vec <tree, 8> interm_types;
3218   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3219 					&code, &multi_step_cvt, &interm_types)
3220       || multi_step_cvt)
3221     return false;
3222 
3223   *convert_code = code;
3224   return true;
3225 }
3226 
3227 /* Function vectorizable_call.
3228 
3229    Check if STMT_INFO performs a function call that can be vectorized.
3230    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3231    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3232    Return true if STMT_INFO is vectorizable in this way.  */
3233 
3234 static bool
vectorizable_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3235 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3236 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
3237 		   stmt_vector_for_cost *cost_vec)
3238 {
3239   gcall *stmt;
3240   tree vec_dest;
3241   tree scalar_dest;
3242   tree op;
3243   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3244   stmt_vec_info prev_stmt_info;
3245   tree vectype_out, vectype_in;
3246   poly_uint64 nunits_in;
3247   poly_uint64 nunits_out;
3248   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3249   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3250   vec_info *vinfo = stmt_info->vinfo;
3251   tree fndecl, new_temp, rhs_type;
3252   enum vect_def_type dt[4]
3253     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3254 	vect_unknown_def_type };
3255   tree vectypes[ARRAY_SIZE (dt)] = {};
3256   int ndts = ARRAY_SIZE (dt);
3257   int ncopies, j;
3258   auto_vec<tree, 8> vargs;
3259   auto_vec<tree, 8> orig_vargs;
3260   enum { NARROW, NONE, WIDEN } modifier;
3261   size_t i, nargs;
3262   tree lhs;
3263 
3264   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3265     return false;
3266 
3267   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3268       && ! vec_stmt)
3269     return false;
3270 
3271   /* Is STMT_INFO a vectorizable call?   */
3272   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3273   if (!stmt)
3274     return false;
3275 
3276   if (gimple_call_internal_p (stmt)
3277       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3278 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3279     /* Handled by vectorizable_load and vectorizable_store.  */
3280     return false;
3281 
3282   if (gimple_call_lhs (stmt) == NULL_TREE
3283       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3284     return false;
3285 
3286   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3287 
3288   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3289 
3290   /* Process function arguments.  */
3291   rhs_type = NULL_TREE;
3292   vectype_in = NULL_TREE;
3293   nargs = gimple_call_num_args (stmt);
3294 
3295   /* Bail out if the function has more than three arguments, we do not have
3296      interesting builtin functions to vectorize with more than two arguments
3297      except for fma.  No arguments is also not good.  */
3298   if (nargs == 0 || nargs > 4)
3299     return false;
3300 
3301   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
3302   combined_fn cfn = gimple_call_combined_fn (stmt);
3303   if (cfn == CFN_GOMP_SIMD_LANE)
3304     {
3305       nargs = 0;
3306       rhs_type = unsigned_type_node;
3307     }
3308 
3309   int mask_opno = -1;
3310   if (internal_fn_p (cfn))
3311     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3312 
3313   for (i = 0; i < nargs; i++)
3314     {
3315       op = gimple_call_arg (stmt, i);
3316 
3317       if ((int) i == mask_opno)
3318 	{
3319 	  if (!vect_check_scalar_mask (stmt_info, op, &dt[i], &vectypes[i]))
3320 	    return false;
3321 	  continue;
3322 	}
3323 
3324       if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3325 	{
3326 	  if (dump_enabled_p ())
3327 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3328 			     "use not simple.\n");
3329 	  return false;
3330 	}
3331 
3332       /* We can only handle calls with arguments of the same type.  */
3333       if (rhs_type
3334 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3335 	{
3336 	  if (dump_enabled_p ())
3337 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3338                              "argument types differ.\n");
3339 	  return false;
3340 	}
3341       if (!rhs_type)
3342 	rhs_type = TREE_TYPE (op);
3343 
3344       if (!vectype_in)
3345 	vectype_in = vectypes[i];
3346       else if (vectypes[i]
3347 	       && !types_compatible_p (vectypes[i], vectype_in))
3348 	{
3349 	  if (dump_enabled_p ())
3350 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3351                              "argument vector types differ.\n");
3352 	  return false;
3353 	}
3354     }
3355   /* If all arguments are external or constant defs, infer the vector type
3356      from the scalar type.  */
3357   if (!vectype_in)
3358     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3359   if (vec_stmt)
3360     gcc_assert (vectype_in);
3361   if (!vectype_in)
3362     {
3363       if (dump_enabled_p ())
3364 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3365 			 "no vectype for scalar type %T\n", rhs_type);
3366 
3367       return false;
3368     }
3369   /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3370      just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
3371      are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3372      by a pack of the two vectors into an SI vector.  We would need
3373      separate code to handle direct VnDI->VnSI IFN_CTZs.  */
3374   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3375     {
3376       if (dump_enabled_p ())
3377 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3378 			 "mismatched vector sizes %T and %T\n",
3379 			 vectype_in, vectype_out);
3380       return false;
3381     }
3382 
3383   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3384       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3385     {
3386       if (dump_enabled_p ())
3387 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3388 			 "mixed mask and nonmask vector types\n");
3389       return false;
3390     }
3391 
3392   /* FORNOW */
3393   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3394   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3395   if (known_eq (nunits_in * 2, nunits_out))
3396     modifier = NARROW;
3397   else if (known_eq (nunits_out, nunits_in))
3398     modifier = NONE;
3399   else if (known_eq (nunits_out * 2, nunits_in))
3400     modifier = WIDEN;
3401   else
3402     return false;
3403 
3404   /* We only handle functions that do not read or clobber memory.  */
3405   if (gimple_vuse (stmt))
3406     {
3407       if (dump_enabled_p ())
3408 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3409 			 "function reads from or writes to memory.\n");
3410       return false;
3411     }
3412 
3413   /* For now, we only vectorize functions if a target specific builtin
3414      is available.  TODO -- in some cases, it might be profitable to
3415      insert the calls for pieces of the vector, in order to be able
3416      to vectorize other operations in the loop.  */
3417   fndecl = NULL_TREE;
3418   internal_fn ifn = IFN_LAST;
3419   tree callee = gimple_call_fndecl (stmt);
3420 
3421   /* First try using an internal function.  */
3422   tree_code convert_code = ERROR_MARK;
3423   if (cfn != CFN_LAST
3424       && (modifier == NONE
3425 	  || (modifier == NARROW
3426 	      && simple_integer_narrowing (vectype_out, vectype_in,
3427 					   &convert_code))))
3428     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3429 					  vectype_in);
3430 
3431   /* If that fails, try asking for a target-specific built-in function.  */
3432   if (ifn == IFN_LAST)
3433     {
3434       if (cfn != CFN_LAST)
3435 	fndecl = targetm.vectorize.builtin_vectorized_function
3436 	  (cfn, vectype_out, vectype_in);
3437       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3438 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3439 	  (callee, vectype_out, vectype_in);
3440     }
3441 
3442   if (ifn == IFN_LAST && !fndecl)
3443     {
3444       if (cfn == CFN_GOMP_SIMD_LANE
3445 	  && !slp_node
3446 	  && loop_vinfo
3447 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3448 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3449 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3450 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3451 	{
3452 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3453 	     { 0, 1, 2, ... vf - 1 } vector.  */
3454 	  gcc_assert (nargs == 0);
3455 	}
3456       else if (modifier == NONE
3457 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3458 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3459 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3460 	return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3461 				   vectype_in, cost_vec);
3462       else
3463 	{
3464 	  if (dump_enabled_p ())
3465 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3466 			     "function is not vectorizable.\n");
3467 	  return false;
3468 	}
3469     }
3470 
3471   if (slp_node)
3472     ncopies = 1;
3473   else if (modifier == NARROW && ifn == IFN_LAST)
3474     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3475   else
3476     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3477 
3478   /* Sanity check: make sure that at least one copy of the vectorized stmt
3479      needs to be generated.  */
3480   gcc_assert (ncopies >= 1);
3481 
3482   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3483   if (!vec_stmt) /* transformation not required.  */
3484     {
3485       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3486       DUMP_VECT_SCOPE ("vectorizable_call");
3487       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3488       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3489 	record_stmt_cost (cost_vec, ncopies / 2,
3490 			  vec_promote_demote, stmt_info, 0, vect_body);
3491 
3492       if (loop_vinfo && mask_opno >= 0)
3493 	{
3494 	  unsigned int nvectors = (slp_node
3495 				   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3496 				   : ncopies);
3497 	  tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3498 	  vect_record_loop_mask (loop_vinfo, masks, nvectors,
3499 				 vectype_out, scalar_mask);
3500 	}
3501       return true;
3502     }
3503 
3504   /* Transform.  */
3505 
3506   if (dump_enabled_p ())
3507     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3508 
3509   /* Handle def.  */
3510   scalar_dest = gimple_call_lhs (stmt);
3511   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3512 
3513   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3514 
3515   stmt_vec_info new_stmt_info = NULL;
3516   prev_stmt_info = NULL;
3517   if (modifier == NONE || ifn != IFN_LAST)
3518     {
3519       tree prev_res = NULL_TREE;
3520       vargs.safe_grow (nargs);
3521       orig_vargs.safe_grow (nargs);
3522       for (j = 0; j < ncopies; ++j)
3523 	{
3524 	  /* Build argument list for the vectorized call.  */
3525 	  if (slp_node)
3526 	    {
3527 	      auto_vec<vec<tree> > vec_defs (nargs);
3528 	      vec<tree> vec_oprnds0;
3529 
3530 	      vect_get_slp_defs (slp_node, &vec_defs);
3531 	      vec_oprnds0 = vec_defs[0];
3532 
3533 	      /* Arguments are ready.  Create the new vector stmt.  */
3534 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3535 		{
3536 		  size_t k;
3537 		  for (k = 0; k < nargs; k++)
3538 		    {
3539 		      vec<tree> vec_oprndsk = vec_defs[k];
3540 		      vargs[k] = vec_oprndsk[i];
3541 		    }
3542 		  if (modifier == NARROW)
3543 		    {
3544 		      /* We don't define any narrowing conditional functions
3545 			 at present.  */
3546 		      gcc_assert (mask_opno < 0);
3547 		      tree half_res = make_ssa_name (vectype_in);
3548 		      gcall *call
3549 			= gimple_build_call_internal_vec (ifn, vargs);
3550 		      gimple_call_set_lhs (call, half_res);
3551 		      gimple_call_set_nothrow (call, true);
3552 		      vect_finish_stmt_generation (stmt_info, call, gsi);
3553 		      if ((i & 1) == 0)
3554 			{
3555 			  prev_res = half_res;
3556 			  continue;
3557 			}
3558 		      new_temp = make_ssa_name (vec_dest);
3559 		      gimple *new_stmt
3560 			= gimple_build_assign (new_temp, convert_code,
3561 					       prev_res, half_res);
3562 		      new_stmt_info
3563 			= vect_finish_stmt_generation (stmt_info, new_stmt,
3564 						       gsi);
3565 		    }
3566 		  else
3567 		    {
3568 		      if (mask_opno >= 0 && masked_loop_p)
3569 			{
3570 			  unsigned int vec_num = vec_oprnds0.length ();
3571 			  /* Always true for SLP.  */
3572 			  gcc_assert (ncopies == 1);
3573 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3574 							  vectype_out, i);
3575 			  vargs[mask_opno] = prepare_load_store_mask
3576 			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3577 			}
3578 
3579 		      gcall *call;
3580 		      if (ifn != IFN_LAST)
3581 			call = gimple_build_call_internal_vec (ifn, vargs);
3582 		      else
3583 			call = gimple_build_call_vec (fndecl, vargs);
3584 		      new_temp = make_ssa_name (vec_dest, call);
3585 		      gimple_call_set_lhs (call, new_temp);
3586 		      gimple_call_set_nothrow (call, true);
3587 		      new_stmt_info
3588 			= vect_finish_stmt_generation (stmt_info, call, gsi);
3589 		    }
3590 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3591 		}
3592 
3593 	      for (i = 0; i < nargs; i++)
3594 		{
3595 		  vec<tree> vec_oprndsi = vec_defs[i];
3596 		  vec_oprndsi.release ();
3597 		}
3598 	      continue;
3599 	    }
3600 
3601 	  for (i = 0; i < nargs; i++)
3602 	    {
3603 	      op = gimple_call_arg (stmt, i);
3604 	      if (j == 0)
3605 		vec_oprnd0
3606 		  = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3607 	      else
3608 		vec_oprnd0
3609 		  = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3610 
3611 	      orig_vargs[i] = vargs[i] = vec_oprnd0;
3612 	    }
3613 
3614 	  if (mask_opno >= 0 && masked_loop_p)
3615 	    {
3616 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3617 					      vectype_out, j);
3618 	      vargs[mask_opno]
3619 		= prepare_load_store_mask (TREE_TYPE (mask), mask,
3620 					   vargs[mask_opno], gsi);
3621 	    }
3622 
3623 	  if (cfn == CFN_GOMP_SIMD_LANE)
3624 	    {
3625 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3626 	      tree new_var
3627 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3628 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3629 	      vect_init_vector_1 (stmt_info, init_stmt, NULL);
3630 	      new_temp = make_ssa_name (vec_dest);
3631 	      gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3632 	      new_stmt_info
3633 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3634 	    }
3635 	  else if (modifier == NARROW)
3636 	    {
3637 	      /* We don't define any narrowing conditional functions at
3638 		 present.  */
3639 	      gcc_assert (mask_opno < 0);
3640 	      tree half_res = make_ssa_name (vectype_in);
3641 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3642 	      gimple_call_set_lhs (call, half_res);
3643 	      gimple_call_set_nothrow (call, true);
3644 	      vect_finish_stmt_generation (stmt_info, call, gsi);
3645 	      if ((j & 1) == 0)
3646 		{
3647 		  prev_res = half_res;
3648 		  continue;
3649 		}
3650 	      new_temp = make_ssa_name (vec_dest);
3651 	      gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3652 						       prev_res, half_res);
3653 	      new_stmt_info
3654 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3655 	    }
3656 	  else
3657 	    {
3658 	      gcall *call;
3659 	      if (ifn != IFN_LAST)
3660 		call = gimple_build_call_internal_vec (ifn, vargs);
3661 	      else
3662 		call = gimple_build_call_vec (fndecl, vargs);
3663 	      new_temp = make_ssa_name (vec_dest, call);
3664 	      gimple_call_set_lhs (call, new_temp);
3665 	      gimple_call_set_nothrow (call, true);
3666 	      new_stmt_info
3667 		= vect_finish_stmt_generation (stmt_info, call, gsi);
3668 	    }
3669 
3670 	  if (j == (modifier == NARROW ? 1 : 0))
3671 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3672 	  else
3673 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3674 
3675 	  prev_stmt_info = new_stmt_info;
3676 	}
3677     }
3678   else if (modifier == NARROW)
3679     {
3680       /* We don't define any narrowing conditional functions at present.  */
3681       gcc_assert (mask_opno < 0);
3682       for (j = 0; j < ncopies; ++j)
3683 	{
3684 	  /* Build argument list for the vectorized call.  */
3685 	  if (j == 0)
3686 	    vargs.create (nargs * 2);
3687 	  else
3688 	    vargs.truncate (0);
3689 
3690 	  if (slp_node)
3691 	    {
3692 	      auto_vec<vec<tree> > vec_defs (nargs);
3693 	      vec<tree> vec_oprnds0;
3694 
3695 	      vect_get_slp_defs (slp_node, &vec_defs);
3696 	      vec_oprnds0 = vec_defs[0];
3697 
3698 	      /* Arguments are ready.  Create the new vector stmt.  */
3699 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3700 		{
3701 		  size_t k;
3702 		  vargs.truncate (0);
3703 		  for (k = 0; k < nargs; k++)
3704 		    {
3705 		      vec<tree> vec_oprndsk = vec_defs[k];
3706 		      vargs.quick_push (vec_oprndsk[i]);
3707 		      vargs.quick_push (vec_oprndsk[i + 1]);
3708 		    }
3709 		  gcall *call;
3710 		  if (ifn != IFN_LAST)
3711 		    call = gimple_build_call_internal_vec (ifn, vargs);
3712 		  else
3713 		    call = gimple_build_call_vec (fndecl, vargs);
3714 		  new_temp = make_ssa_name (vec_dest, call);
3715 		  gimple_call_set_lhs (call, new_temp);
3716 		  gimple_call_set_nothrow (call, true);
3717 		  new_stmt_info
3718 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
3719 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3720 		}
3721 
3722 	      for (i = 0; i < nargs; i++)
3723 		{
3724 		  vec<tree> vec_oprndsi = vec_defs[i];
3725 		  vec_oprndsi.release ();
3726 		}
3727 	      continue;
3728 	    }
3729 
3730 	  for (i = 0; i < nargs; i++)
3731 	    {
3732 	      op = gimple_call_arg (stmt, i);
3733 	      if (j == 0)
3734 		{
3735 		  vec_oprnd0
3736 		    = vect_get_vec_def_for_operand (op, stmt_info,
3737 						    vectypes[i]);
3738 		  vec_oprnd1
3739 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3740 		}
3741 	      else
3742 		{
3743 		  vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3744 						2 * i + 1);
3745 		  vec_oprnd0
3746 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3747 		  vec_oprnd1
3748 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3749 		}
3750 
3751 	      vargs.quick_push (vec_oprnd0);
3752 	      vargs.quick_push (vec_oprnd1);
3753 	    }
3754 
3755 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3756 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3757 	  gimple_call_set_lhs (new_stmt, new_temp);
3758 	  new_stmt_info
3759 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3760 
3761 	  if (j == 0)
3762 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3763 	  else
3764 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3765 
3766 	  prev_stmt_info = new_stmt_info;
3767 	}
3768 
3769       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3770     }
3771   else
3772     /* No current target implements this case.  */
3773     return false;
3774 
3775   vargs.release ();
3776 
3777   /* The call in STMT might prevent it from being removed in dce.
3778      We however cannot remove it here, due to the way the ssa name
3779      it defines is mapped to the new definition.  So just replace
3780      rhs of the statement with something harmless.  */
3781 
3782   if (slp_node)
3783     return true;
3784 
3785   stmt_info = vect_orig_stmt (stmt_info);
3786   lhs = gimple_get_lhs (stmt_info->stmt);
3787 
3788   gassign *new_stmt
3789     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3790   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3791 
3792   return true;
3793 }
3794 
3795 
3796 struct simd_call_arg_info
3797 {
3798   tree vectype;
3799   tree op;
3800   HOST_WIDE_INT linear_step;
3801   enum vect_def_type dt;
3802   unsigned int align;
3803   bool simd_lane_linear;
3804 };
3805 
3806 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3807    is linear within simd lane (but not within whole loop), note it in
3808    *ARGINFO.  */
3809 
3810 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3811 vect_simd_lane_linear (tree op, class loop *loop,
3812 		       struct simd_call_arg_info *arginfo)
3813 {
3814   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3815 
3816   if (!is_gimple_assign (def_stmt)
3817       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3818       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3819     return;
3820 
3821   tree base = gimple_assign_rhs1 (def_stmt);
3822   HOST_WIDE_INT linear_step = 0;
3823   tree v = gimple_assign_rhs2 (def_stmt);
3824   while (TREE_CODE (v) == SSA_NAME)
3825     {
3826       tree t;
3827       def_stmt = SSA_NAME_DEF_STMT (v);
3828       if (is_gimple_assign (def_stmt))
3829 	switch (gimple_assign_rhs_code (def_stmt))
3830 	  {
3831 	  case PLUS_EXPR:
3832 	    t = gimple_assign_rhs2 (def_stmt);
3833 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3834 	      return;
3835 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3836 	    v = gimple_assign_rhs1 (def_stmt);
3837 	    continue;
3838 	  case MULT_EXPR:
3839 	    t = gimple_assign_rhs2 (def_stmt);
3840 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3841 	      return;
3842 	    linear_step = tree_to_shwi (t);
3843 	    v = gimple_assign_rhs1 (def_stmt);
3844 	    continue;
3845 	  CASE_CONVERT:
3846 	    t = gimple_assign_rhs1 (def_stmt);
3847 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3848 		|| (TYPE_PRECISION (TREE_TYPE (v))
3849 		    < TYPE_PRECISION (TREE_TYPE (t))))
3850 	      return;
3851 	    if (!linear_step)
3852 	      linear_step = 1;
3853 	    v = t;
3854 	    continue;
3855 	  default:
3856 	    return;
3857 	  }
3858       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3859 	       && loop->simduid
3860 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3861 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3862 		   == loop->simduid))
3863 	{
3864 	  if (!linear_step)
3865 	    linear_step = 1;
3866 	  arginfo->linear_step = linear_step;
3867 	  arginfo->op = base;
3868 	  arginfo->simd_lane_linear = true;
3869 	  return;
3870 	}
3871     }
3872 }
3873 
3874 /* Return the number of elements in vector type VECTYPE, which is associated
3875    with a SIMD clone.  At present these vectors always have a constant
3876    length.  */
3877 
3878 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3879 simd_clone_subparts (tree vectype)
3880 {
3881   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3882 }
3883 
3884 /* Function vectorizable_simd_clone_call.
3885 
3886    Check if STMT_INFO performs a function call that can be vectorized
3887    by calling a simd clone of the function.
3888    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3889    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3890    Return true if STMT_INFO is vectorizable in this way.  */
3891 
3892 static bool
vectorizable_simd_clone_call(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3893 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3894 			      gimple_stmt_iterator *gsi,
3895 			      stmt_vec_info *vec_stmt, slp_tree slp_node,
3896 			      stmt_vector_for_cost *)
3897 {
3898   tree vec_dest;
3899   tree scalar_dest;
3900   tree op, type;
3901   tree vec_oprnd0 = NULL_TREE;
3902   stmt_vec_info prev_stmt_info;
3903   tree vectype;
3904   unsigned int nunits;
3905   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3906   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3907   vec_info *vinfo = stmt_info->vinfo;
3908   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3909   tree fndecl, new_temp;
3910   int ncopies, j;
3911   auto_vec<simd_call_arg_info> arginfo;
3912   vec<tree> vargs = vNULL;
3913   size_t i, nargs;
3914   tree lhs, rtype, ratype;
3915   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3916 
3917   /* Is STMT a vectorizable call?   */
3918   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3919   if (!stmt)
3920     return false;
3921 
3922   fndecl = gimple_call_fndecl (stmt);
3923   if (fndecl == NULL_TREE)
3924     return false;
3925 
3926   struct cgraph_node *node = cgraph_node::get (fndecl);
3927   if (node == NULL || node->simd_clones == NULL)
3928     return false;
3929 
3930   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3931     return false;
3932 
3933   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3934       && ! vec_stmt)
3935     return false;
3936 
3937   if (gimple_call_lhs (stmt)
3938       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3939     return false;
3940 
3941   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3942 
3943   vectype = STMT_VINFO_VECTYPE (stmt_info);
3944 
3945   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3946     return false;
3947 
3948   /* FORNOW */
3949   if (slp_node)
3950     return false;
3951 
3952   /* Process function arguments.  */
3953   nargs = gimple_call_num_args (stmt);
3954 
3955   /* Bail out if the function has zero arguments.  */
3956   if (nargs == 0)
3957     return false;
3958 
3959   arginfo.reserve (nargs, true);
3960 
3961   for (i = 0; i < nargs; i++)
3962     {
3963       simd_call_arg_info thisarginfo;
3964       affine_iv iv;
3965 
3966       thisarginfo.linear_step = 0;
3967       thisarginfo.align = 0;
3968       thisarginfo.op = NULL_TREE;
3969       thisarginfo.simd_lane_linear = false;
3970 
3971       op = gimple_call_arg (stmt, i);
3972       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3973 			       &thisarginfo.vectype)
3974 	  || thisarginfo.dt == vect_uninitialized_def)
3975 	{
3976 	  if (dump_enabled_p ())
3977 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3978 			     "use not simple.\n");
3979 	  return false;
3980 	}
3981 
3982       if (thisarginfo.dt == vect_constant_def
3983 	  || thisarginfo.dt == vect_external_def)
3984 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3985       else
3986 	{
3987 	  gcc_assert (thisarginfo.vectype != NULL_TREE);
3988 	  if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3989 	    {
3990 	      if (dump_enabled_p ())
3991 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3992 				 "vector mask arguments are not supported\n");
3993 	      return false;
3994 	    }
3995 	}
3996 
3997       /* For linear arguments, the analyze phase should have saved
3998 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3999       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4000 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4001 	{
4002 	  gcc_assert (vec_stmt);
4003 	  thisarginfo.linear_step
4004 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4005 	  thisarginfo.op
4006 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4007 	  thisarginfo.simd_lane_linear
4008 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4009 	       == boolean_true_node);
4010 	  /* If loop has been peeled for alignment, we need to adjust it.  */
4011 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4012 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4013 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
4014 	    {
4015 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4016 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4017 	      tree opt = TREE_TYPE (thisarginfo.op);
4018 	      bias = fold_convert (TREE_TYPE (step), bias);
4019 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4020 	      thisarginfo.op
4021 		= fold_build2 (POINTER_TYPE_P (opt)
4022 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4023 			       thisarginfo.op, bias);
4024 	    }
4025 	}
4026       else if (!vec_stmt
4027 	       && thisarginfo.dt != vect_constant_def
4028 	       && thisarginfo.dt != vect_external_def
4029 	       && loop_vinfo
4030 	       && TREE_CODE (op) == SSA_NAME
4031 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
4032 			     &iv, false)
4033 	       && tree_fits_shwi_p (iv.step))
4034 	{
4035 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
4036 	  thisarginfo.op = iv.base;
4037 	}
4038       else if ((thisarginfo.dt == vect_constant_def
4039 		|| thisarginfo.dt == vect_external_def)
4040 	       && POINTER_TYPE_P (TREE_TYPE (op)))
4041 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4042       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4043 	 linear too.  */
4044       if (POINTER_TYPE_P (TREE_TYPE (op))
4045 	  && !thisarginfo.linear_step
4046 	  && !vec_stmt
4047 	  && thisarginfo.dt != vect_constant_def
4048 	  && thisarginfo.dt != vect_external_def
4049 	  && loop_vinfo
4050 	  && !slp_node
4051 	  && TREE_CODE (op) == SSA_NAME)
4052 	vect_simd_lane_linear (op, loop, &thisarginfo);
4053 
4054       arginfo.quick_push (thisarginfo);
4055     }
4056 
4057   unsigned HOST_WIDE_INT vf;
4058   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4059     {
4060       if (dump_enabled_p ())
4061 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4062 			 "not considering SIMD clones; not yet supported"
4063 			 " for variable-width vectors.\n");
4064       return false;
4065     }
4066 
4067   unsigned int badness = 0;
4068   struct cgraph_node *bestn = NULL;
4069   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4070     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4071   else
4072     for (struct cgraph_node *n = node->simd_clones; n != NULL;
4073 	 n = n->simdclone->next_clone)
4074       {
4075 	unsigned int this_badness = 0;
4076 	if (n->simdclone->simdlen > vf
4077 	    || n->simdclone->nargs != nargs)
4078 	  continue;
4079 	if (n->simdclone->simdlen < vf)
4080 	  this_badness += (exact_log2 (vf)
4081 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
4082 	if (n->simdclone->inbranch)
4083 	  this_badness += 2048;
4084 	int target_badness = targetm.simd_clone.usable (n);
4085 	if (target_badness < 0)
4086 	  continue;
4087 	this_badness += target_badness * 512;
4088 	/* FORNOW: Have to add code to add the mask argument.  */
4089 	if (n->simdclone->inbranch)
4090 	  continue;
4091 	for (i = 0; i < nargs; i++)
4092 	  {
4093 	    switch (n->simdclone->args[i].arg_type)
4094 	      {
4095 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
4096 		if (!useless_type_conversion_p
4097 			(n->simdclone->args[i].orig_type,
4098 			 TREE_TYPE (gimple_call_arg (stmt, i))))
4099 		  i = -1;
4100 		else if (arginfo[i].dt == vect_constant_def
4101 			 || arginfo[i].dt == vect_external_def
4102 			 || arginfo[i].linear_step)
4103 		  this_badness += 64;
4104 		break;
4105 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
4106 		if (arginfo[i].dt != vect_constant_def
4107 		    && arginfo[i].dt != vect_external_def)
4108 		  i = -1;
4109 		break;
4110 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4111 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4112 		if (arginfo[i].dt == vect_constant_def
4113 		    || arginfo[i].dt == vect_external_def
4114 		    || (arginfo[i].linear_step
4115 			!= n->simdclone->args[i].linear_step))
4116 		  i = -1;
4117 		break;
4118 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4119 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4120 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4121 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4122 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4123 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4124 		/* FORNOW */
4125 		i = -1;
4126 		break;
4127 	      case SIMD_CLONE_ARG_TYPE_MASK:
4128 		gcc_unreachable ();
4129 	      }
4130 	    if (i == (size_t) -1)
4131 	      break;
4132 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
4133 	      {
4134 		i = -1;
4135 		break;
4136 	      }
4137 	    if (arginfo[i].align)
4138 	      this_badness += (exact_log2 (arginfo[i].align)
4139 			       - exact_log2 (n->simdclone->args[i].alignment));
4140 	  }
4141 	if (i == (size_t) -1)
4142 	  continue;
4143 	if (bestn == NULL || this_badness < badness)
4144 	  {
4145 	    bestn = n;
4146 	    badness = this_badness;
4147 	  }
4148       }
4149 
4150   if (bestn == NULL)
4151     return false;
4152 
4153   for (i = 0; i < nargs; i++)
4154     if ((arginfo[i].dt == vect_constant_def
4155 	 || arginfo[i].dt == vect_external_def)
4156 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4157       {
4158 	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4159 	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4160 							  slp_node);
4161 	if (arginfo[i].vectype == NULL
4162 	    || (simd_clone_subparts (arginfo[i].vectype)
4163 		> bestn->simdclone->simdlen))
4164 	  return false;
4165       }
4166 
4167   fndecl = bestn->decl;
4168   nunits = bestn->simdclone->simdlen;
4169   ncopies = vf / nunits;
4170 
4171   /* If the function isn't const, only allow it in simd loops where user
4172      has asserted that at least nunits consecutive iterations can be
4173      performed using SIMD instructions.  */
4174   if ((loop == NULL || (unsigned) loop->safelen < nunits)
4175       && gimple_vuse (stmt))
4176     return false;
4177 
4178   /* Sanity check: make sure that at least one copy of the vectorized stmt
4179      needs to be generated.  */
4180   gcc_assert (ncopies >= 1);
4181 
4182   if (!vec_stmt) /* transformation not required.  */
4183     {
4184       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4185       for (i = 0; i < nargs; i++)
4186 	if ((bestn->simdclone->args[i].arg_type
4187 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4188 	    || (bestn->simdclone->args[i].arg_type
4189 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4190 	  {
4191 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4192 									+ 1);
4193 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4194 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4195 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
4196 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
4197 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4198 	    tree sll = arginfo[i].simd_lane_linear
4199 		       ? boolean_true_node : boolean_false_node;
4200 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4201 	  }
4202       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4203       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4204 /*      vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4205       return true;
4206     }
4207 
4208   /* Transform.  */
4209 
4210   if (dump_enabled_p ())
4211     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4212 
4213   /* Handle def.  */
4214   scalar_dest = gimple_call_lhs (stmt);
4215   vec_dest = NULL_TREE;
4216   rtype = NULL_TREE;
4217   ratype = NULL_TREE;
4218   if (scalar_dest)
4219     {
4220       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4221       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4222       if (TREE_CODE (rtype) == ARRAY_TYPE)
4223 	{
4224 	  ratype = rtype;
4225 	  rtype = TREE_TYPE (ratype);
4226 	}
4227     }
4228 
4229   prev_stmt_info = NULL;
4230   for (j = 0; j < ncopies; ++j)
4231     {
4232       /* Build argument list for the vectorized call.  */
4233       if (j == 0)
4234 	vargs.create (nargs);
4235       else
4236 	vargs.truncate (0);
4237 
4238       for (i = 0; i < nargs; i++)
4239 	{
4240 	  unsigned int k, l, m, o;
4241 	  tree atype;
4242 	  op = gimple_call_arg (stmt, i);
4243 	  switch (bestn->simdclone->args[i].arg_type)
4244 	    {
4245 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
4246 	      atype = bestn->simdclone->args[i].vector_type;
4247 	      o = nunits / simd_clone_subparts (atype);
4248 	      for (m = j * o; m < (j + 1) * o; m++)
4249 		{
4250 		  if (simd_clone_subparts (atype)
4251 		      < simd_clone_subparts (arginfo[i].vectype))
4252 		    {
4253 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4254 		      k = (simd_clone_subparts (arginfo[i].vectype)
4255 			   / simd_clone_subparts (atype));
4256 		      gcc_assert ((k & (k - 1)) == 0);
4257 		      if (m == 0)
4258 			vec_oprnd0
4259 			  = vect_get_vec_def_for_operand (op, stmt_info);
4260 		      else
4261 			{
4262 			  vec_oprnd0 = arginfo[i].op;
4263 			  if ((m & (k - 1)) == 0)
4264 			    vec_oprnd0
4265 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4266 								vec_oprnd0);
4267 			}
4268 		      arginfo[i].op = vec_oprnd0;
4269 		      vec_oprnd0
4270 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4271 				  bitsize_int (prec),
4272 				  bitsize_int ((m & (k - 1)) * prec));
4273 		      gassign *new_stmt
4274 			= gimple_build_assign (make_ssa_name (atype),
4275 					       vec_oprnd0);
4276 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4277 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
4278 		    }
4279 		  else
4280 		    {
4281 		      k = (simd_clone_subparts (atype)
4282 			   / simd_clone_subparts (arginfo[i].vectype));
4283 		      gcc_assert ((k & (k - 1)) == 0);
4284 		      vec<constructor_elt, va_gc> *ctor_elts;
4285 		      if (k != 1)
4286 			vec_alloc (ctor_elts, k);
4287 		      else
4288 			ctor_elts = NULL;
4289 		      for (l = 0; l < k; l++)
4290 			{
4291 			  if (m == 0 && l == 0)
4292 			    vec_oprnd0
4293 			      = vect_get_vec_def_for_operand (op, stmt_info);
4294 			  else
4295 			    vec_oprnd0
4296 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4297 								arginfo[i].op);
4298 			  arginfo[i].op = vec_oprnd0;
4299 			  if (k == 1)
4300 			    break;
4301 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4302 						  vec_oprnd0);
4303 			}
4304 		      if (k == 1)
4305 			vargs.safe_push (vec_oprnd0);
4306 		      else
4307 			{
4308 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4309 			  gassign *new_stmt
4310 			    = gimple_build_assign (make_ssa_name (atype),
4311 						   vec_oprnd0);
4312 			  vect_finish_stmt_generation (stmt_info, new_stmt,
4313 						       gsi);
4314 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4315 			}
4316 		    }
4317 		}
4318 	      break;
4319 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4320 	      vargs.safe_push (op);
4321 	      break;
4322 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4323 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4324 	      if (j == 0)
4325 		{
4326 		  gimple_seq stmts;
4327 		  arginfo[i].op
4328 		    = force_gimple_operand (unshare_expr (arginfo[i].op),
4329 					    &stmts, true, NULL_TREE);
4330 		  if (stmts != NULL)
4331 		    {
4332 		      basic_block new_bb;
4333 		      edge pe = loop_preheader_edge (loop);
4334 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4335 		      gcc_assert (!new_bb);
4336 		    }
4337 		  if (arginfo[i].simd_lane_linear)
4338 		    {
4339 		      vargs.safe_push (arginfo[i].op);
4340 		      break;
4341 		    }
4342 		  tree phi_res = copy_ssa_name (op);
4343 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4344 		  loop_vinfo->add_stmt (new_phi);
4345 		  add_phi_arg (new_phi, arginfo[i].op,
4346 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4347 		  enum tree_code code
4348 		    = POINTER_TYPE_P (TREE_TYPE (op))
4349 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4350 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4351 			      ? sizetype : TREE_TYPE (op);
4352 		  widest_int cst
4353 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4354 			       ncopies * nunits);
4355 		  tree tcst = wide_int_to_tree (type, cst);
4356 		  tree phi_arg = copy_ssa_name (op);
4357 		  gassign *new_stmt
4358 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4359 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4360 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4361 		  loop_vinfo->add_stmt (new_stmt);
4362 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4363 			       UNKNOWN_LOCATION);
4364 		  arginfo[i].op = phi_res;
4365 		  vargs.safe_push (phi_res);
4366 		}
4367 	      else
4368 		{
4369 		  enum tree_code code
4370 		    = POINTER_TYPE_P (TREE_TYPE (op))
4371 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4372 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4373 			      ? sizetype : TREE_TYPE (op);
4374 		  widest_int cst
4375 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4376 			       j * nunits);
4377 		  tree tcst = wide_int_to_tree (type, cst);
4378 		  new_temp = make_ssa_name (TREE_TYPE (op));
4379 		  gassign *new_stmt
4380 		    = gimple_build_assign (new_temp, code,
4381 					   arginfo[i].op, tcst);
4382 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4383 		  vargs.safe_push (new_temp);
4384 		}
4385 	      break;
4386 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4387 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4388 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4389 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4390 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4391 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4392 	    default:
4393 	      gcc_unreachable ();
4394 	    }
4395 	}
4396 
4397       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4398       if (vec_dest)
4399 	{
4400 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4401 	  if (ratype)
4402 	    new_temp = create_tmp_var (ratype);
4403 	  else if (simd_clone_subparts (vectype)
4404 		   == simd_clone_subparts (rtype))
4405 	    new_temp = make_ssa_name (vec_dest, new_call);
4406 	  else
4407 	    new_temp = make_ssa_name (rtype, new_call);
4408 	  gimple_call_set_lhs (new_call, new_temp);
4409 	}
4410       stmt_vec_info new_stmt_info
4411 	= vect_finish_stmt_generation (stmt_info, new_call, gsi);
4412 
4413       if (vec_dest)
4414 	{
4415 	  if (simd_clone_subparts (vectype) < nunits)
4416 	    {
4417 	      unsigned int k, l;
4418 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4419 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4420 	      k = nunits / simd_clone_subparts (vectype);
4421 	      gcc_assert ((k & (k - 1)) == 0);
4422 	      for (l = 0; l < k; l++)
4423 		{
4424 		  tree t;
4425 		  if (ratype)
4426 		    {
4427 		      t = build_fold_addr_expr (new_temp);
4428 		      t = build2 (MEM_REF, vectype, t,
4429 				  build_int_cst (TREE_TYPE (t), l * bytes));
4430 		    }
4431 		  else
4432 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4433 				bitsize_int (prec), bitsize_int (l * prec));
4434 		  gimple *new_stmt
4435 		    = gimple_build_assign (make_ssa_name (vectype), t);
4436 		  new_stmt_info
4437 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4438 
4439 		  if (j == 0 && l == 0)
4440 		    STMT_VINFO_VEC_STMT (stmt_info)
4441 		      = *vec_stmt = new_stmt_info;
4442 		  else
4443 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4444 
4445 		  prev_stmt_info = new_stmt_info;
4446 		}
4447 
4448 	      if (ratype)
4449 		vect_clobber_variable (stmt_info, gsi, new_temp);
4450 	      continue;
4451 	    }
4452 	  else if (simd_clone_subparts (vectype) > nunits)
4453 	    {
4454 	      unsigned int k = (simd_clone_subparts (vectype)
4455 				/ simd_clone_subparts (rtype));
4456 	      gcc_assert ((k & (k - 1)) == 0);
4457 	      if ((j & (k - 1)) == 0)
4458 		vec_alloc (ret_ctor_elts, k);
4459 	      if (ratype)
4460 		{
4461 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
4462 		  for (m = 0; m < o; m++)
4463 		    {
4464 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4465 					 size_int (m), NULL_TREE, NULL_TREE);
4466 		      gimple *new_stmt
4467 			= gimple_build_assign (make_ssa_name (rtype), tem);
4468 		      new_stmt_info
4469 			= vect_finish_stmt_generation (stmt_info, new_stmt,
4470 						       gsi);
4471 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4472 					      gimple_assign_lhs (new_stmt));
4473 		    }
4474 		  vect_clobber_variable (stmt_info, gsi, new_temp);
4475 		}
4476 	      else
4477 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4478 	      if ((j & (k - 1)) != k - 1)
4479 		continue;
4480 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4481 	      gimple *new_stmt
4482 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4483 	      new_stmt_info
4484 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4485 
4486 	      if ((unsigned) j == k - 1)
4487 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4488 	      else
4489 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4490 
4491 	      prev_stmt_info = new_stmt_info;
4492 	      continue;
4493 	    }
4494 	  else if (ratype)
4495 	    {
4496 	      tree t = build_fold_addr_expr (new_temp);
4497 	      t = build2 (MEM_REF, vectype, t,
4498 			  build_int_cst (TREE_TYPE (t), 0));
4499 	      gimple *new_stmt
4500 		= gimple_build_assign (make_ssa_name (vec_dest), t);
4501 	      new_stmt_info
4502 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4503 	      vect_clobber_variable (stmt_info, gsi, new_temp);
4504 	    }
4505 	}
4506 
4507       if (j == 0)
4508 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4509       else
4510 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4511 
4512       prev_stmt_info = new_stmt_info;
4513     }
4514 
4515   vargs.release ();
4516 
4517   /* The call in STMT might prevent it from being removed in dce.
4518      We however cannot remove it here, due to the way the ssa name
4519      it defines is mapped to the new definition.  So just replace
4520      rhs of the statement with something harmless.  */
4521 
4522   if (slp_node)
4523     return true;
4524 
4525   gimple *new_stmt;
4526   if (scalar_dest)
4527     {
4528       type = TREE_TYPE (scalar_dest);
4529       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4530       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4531     }
4532   else
4533     new_stmt = gimple_build_nop ();
4534   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4535   unlink_stmt_vdef (stmt);
4536 
4537   return true;
4538 }
4539 
4540 
4541 /* Function vect_gen_widened_results_half
4542 
4543    Create a vector stmt whose code, type, number of arguments, and result
4544    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4545    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
4546    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4547    needs to be created (DECL is a function-decl of a target-builtin).
4548    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4549 
4550 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4551 vect_gen_widened_results_half (enum tree_code code,
4552                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4553 			       tree vec_dest, gimple_stmt_iterator *gsi,
4554 			       stmt_vec_info stmt_info)
4555 {
4556   gimple *new_stmt;
4557   tree new_temp;
4558 
4559   /* Generate half of the widened result:  */
4560   gcc_assert (op_type == TREE_CODE_LENGTH (code));
4561   if (op_type != binary_op)
4562     vec_oprnd1 = NULL;
4563   new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4564   new_temp = make_ssa_name (vec_dest, new_stmt);
4565   gimple_assign_set_lhs (new_stmt, new_temp);
4566   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4567 
4568   return new_stmt;
4569 }
4570 
4571 
4572 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4573    For the first operand we call vect_get_vec_def_for_operand (with OPRND
4574    containing scalar operand), and for the rest we get a copy with
4575    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4576    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4577    The vectors are collected into VEC_OPRNDS.  */
4578 
4579 static void
vect_get_loop_based_defs(tree * oprnd,stmt_vec_info stmt_info,vec<tree> * vec_oprnds,int multi_step_cvt)4580 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4581 			  vec<tree> *vec_oprnds, int multi_step_cvt)
4582 {
4583   vec_info *vinfo = stmt_info->vinfo;
4584   tree vec_oprnd;
4585 
4586   /* Get first vector operand.  */
4587   /* All the vector operands except the very first one (that is scalar oprnd)
4588      are stmt copies.  */
4589   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4590     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4591   else
4592     vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4593 
4594   vec_oprnds->quick_push (vec_oprnd);
4595 
4596   /* Get second vector operand.  */
4597   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4598   vec_oprnds->quick_push (vec_oprnd);
4599 
4600   *oprnd = vec_oprnd;
4601 
4602   /* For conversion in multiple steps, continue to get operands
4603      recursively.  */
4604   if (multi_step_cvt)
4605     vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4606 			      multi_step_cvt - 1);
4607 }
4608 
4609 
4610 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4611    For multi-step conversions store the resulting vectors and call the function
4612    recursively.  */
4613 
4614 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4615 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4616 				       int multi_step_cvt,
4617 				       stmt_vec_info stmt_info,
4618 				       vec<tree> vec_dsts,
4619 				       gimple_stmt_iterator *gsi,
4620 				       slp_tree slp_node, enum tree_code code,
4621 				       stmt_vec_info *prev_stmt_info)
4622 {
4623   unsigned int i;
4624   tree vop0, vop1, new_tmp, vec_dest;
4625 
4626   vec_dest = vec_dsts.pop ();
4627 
4628   for (i = 0; i < vec_oprnds->length (); i += 2)
4629     {
4630       /* Create demotion operation.  */
4631       vop0 = (*vec_oprnds)[i];
4632       vop1 = (*vec_oprnds)[i + 1];
4633       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4634       new_tmp = make_ssa_name (vec_dest, new_stmt);
4635       gimple_assign_set_lhs (new_stmt, new_tmp);
4636       stmt_vec_info new_stmt_info
4637 	= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4638 
4639       if (multi_step_cvt)
4640 	/* Store the resulting vector for next recursive call.  */
4641 	(*vec_oprnds)[i/2] = new_tmp;
4642       else
4643 	{
4644 	  /* This is the last step of the conversion sequence. Store the
4645 	     vectors in SLP_NODE or in vector info of the scalar statement
4646 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4647 	  if (slp_node)
4648 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4649 	  else
4650 	    {
4651 	      if (!*prev_stmt_info)
4652 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4653 	      else
4654 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4655 
4656 	      *prev_stmt_info = new_stmt_info;
4657 	    }
4658 	}
4659     }
4660 
4661   /* For multi-step demotion operations we first generate demotion operations
4662      from the source type to the intermediate types, and then combine the
4663      results (stored in VEC_OPRNDS) in demotion operation to the destination
4664      type.  */
4665   if (multi_step_cvt)
4666     {
4667       /* At each level of recursion we have half of the operands we had at the
4668 	 previous level.  */
4669       vec_oprnds->truncate ((i+1)/2);
4670       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4671 					     stmt_info, vec_dsts, gsi,
4672 					     slp_node, VEC_PACK_TRUNC_EXPR,
4673 					     prev_stmt_info);
4674     }
4675 
4676   vec_dsts.quick_push (vec_dest);
4677 }
4678 
4679 
4680 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4681    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4682    STMT_INFO.  For multi-step conversions store the resulting vectors and
4683    call the function recursively.  */
4684 
4685 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4686 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4687 					vec<tree> *vec_oprnds1,
4688 					stmt_vec_info stmt_info, tree vec_dest,
4689 					gimple_stmt_iterator *gsi,
4690 					enum tree_code code1,
4691 					enum tree_code code2, int op_type)
4692 {
4693   int i;
4694   tree vop0, vop1, new_tmp1, new_tmp2;
4695   gimple *new_stmt1, *new_stmt2;
4696   vec<tree> vec_tmp = vNULL;
4697 
4698   vec_tmp.create (vec_oprnds0->length () * 2);
4699   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4700     {
4701       if (op_type == binary_op)
4702 	vop1 = (*vec_oprnds1)[i];
4703       else
4704 	vop1 = NULL_TREE;
4705 
4706       /* Generate the two halves of promotion operation.  */
4707       new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1,
4708 						 op_type, vec_dest, gsi,
4709 						 stmt_info);
4710       new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1,
4711 						 op_type, vec_dest, gsi,
4712 						 stmt_info);
4713       if (is_gimple_call (new_stmt1))
4714 	{
4715 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4716 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4717 	}
4718       else
4719 	{
4720 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4721 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4722 	}
4723 
4724       /* Store the results for the next step.  */
4725       vec_tmp.quick_push (new_tmp1);
4726       vec_tmp.quick_push (new_tmp2);
4727     }
4728 
4729   vec_oprnds0->release ();
4730   *vec_oprnds0 = vec_tmp;
4731 }
4732 
4733 
4734 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4735    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4736    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4737    Return true if STMT_INFO is vectorizable in this way.  */
4738 
4739 static bool
vectorizable_conversion(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4740 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4741 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
4742 			 stmt_vector_for_cost *cost_vec)
4743 {
4744   tree vec_dest;
4745   tree scalar_dest;
4746   tree op0, op1 = NULL_TREE;
4747   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4748   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4749   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4750   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4751   tree new_temp;
4752   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4753   int ndts = 2;
4754   stmt_vec_info prev_stmt_info;
4755   poly_uint64 nunits_in;
4756   poly_uint64 nunits_out;
4757   tree vectype_out, vectype_in;
4758   int ncopies, i, j;
4759   tree lhs_type, rhs_type;
4760   enum { NARROW, NONE, WIDEN } modifier;
4761   vec<tree> vec_oprnds0 = vNULL;
4762   vec<tree> vec_oprnds1 = vNULL;
4763   tree vop0;
4764   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4765   vec_info *vinfo = stmt_info->vinfo;
4766   int multi_step_cvt = 0;
4767   vec<tree> interm_types = vNULL;
4768   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4769   int op_type;
4770   unsigned short fltsz;
4771 
4772   /* Is STMT a vectorizable conversion?   */
4773 
4774   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4775     return false;
4776 
4777   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4778       && ! vec_stmt)
4779     return false;
4780 
4781   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4782   if (!stmt)
4783     return false;
4784 
4785   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4786     return false;
4787 
4788   code = gimple_assign_rhs_code (stmt);
4789   if (!CONVERT_EXPR_CODE_P (code)
4790       && code != FIX_TRUNC_EXPR
4791       && code != FLOAT_EXPR
4792       && code != WIDEN_MULT_EXPR
4793       && code != WIDEN_LSHIFT_EXPR)
4794     return false;
4795 
4796   op_type = TREE_CODE_LENGTH (code);
4797 
4798   /* Check types of lhs and rhs.  */
4799   scalar_dest = gimple_assign_lhs (stmt);
4800   lhs_type = TREE_TYPE (scalar_dest);
4801   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4802 
4803   op0 = gimple_assign_rhs1 (stmt);
4804   rhs_type = TREE_TYPE (op0);
4805 
4806   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4807       && !((INTEGRAL_TYPE_P (lhs_type)
4808 	    && INTEGRAL_TYPE_P (rhs_type))
4809 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4810 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4811     return false;
4812 
4813   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4814       && ((INTEGRAL_TYPE_P (lhs_type)
4815 	   && !type_has_mode_precision_p (lhs_type))
4816 	  || (INTEGRAL_TYPE_P (rhs_type)
4817 	      && !type_has_mode_precision_p (rhs_type))))
4818     {
4819       if (dump_enabled_p ())
4820 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4821                          "type conversion to/from bit-precision unsupported."
4822                          "\n");
4823       return false;
4824     }
4825 
4826   /* Check the operands of the operation.  */
4827   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4828     {
4829       if (dump_enabled_p ())
4830 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4831                          "use not simple.\n");
4832       return false;
4833     }
4834   if (op_type == binary_op)
4835     {
4836       bool ok;
4837 
4838       op1 = gimple_assign_rhs2 (stmt);
4839       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4840       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4841 	 OP1.  */
4842       if (CONSTANT_CLASS_P (op0))
4843 	ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4844       else
4845 	ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4846 
4847       if (!ok)
4848 	{
4849           if (dump_enabled_p ())
4850             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4851                              "use not simple.\n");
4852 	  return false;
4853 	}
4854     }
4855 
4856   /* If op0 is an external or constant def, infer the vector type
4857      from the scalar type.  */
4858   if (!vectype_in)
4859     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4860   if (vec_stmt)
4861     gcc_assert (vectype_in);
4862   if (!vectype_in)
4863     {
4864       if (dump_enabled_p ())
4865 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4866 			 "no vectype for scalar type %T\n", rhs_type);
4867 
4868       return false;
4869     }
4870 
4871   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4872       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4873     {
4874       if (dump_enabled_p ())
4875 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4876 			 "can't convert between boolean and non "
4877 			 "boolean vectors %T\n", rhs_type);
4878 
4879       return false;
4880     }
4881 
4882   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4883   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4884   if (known_eq (nunits_out, nunits_in))
4885     modifier = NONE;
4886   else if (multiple_p (nunits_out, nunits_in))
4887     modifier = NARROW;
4888   else
4889     {
4890       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4891       modifier = WIDEN;
4892     }
4893 
4894   /* Multiple types in SLP are handled by creating the appropriate number of
4895      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4896      case of SLP.  */
4897   if (slp_node)
4898     ncopies = 1;
4899   else if (modifier == NARROW)
4900     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4901   else
4902     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4903 
4904   /* Sanity check: make sure that at least one copy of the vectorized stmt
4905      needs to be generated.  */
4906   gcc_assert (ncopies >= 1);
4907 
4908   bool found_mode = false;
4909   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4910   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4911   opt_scalar_mode rhs_mode_iter;
4912 
4913   /* Supportable by target?  */
4914   switch (modifier)
4915     {
4916     case NONE:
4917       if (code != FIX_TRUNC_EXPR
4918 	  && code != FLOAT_EXPR
4919 	  && !CONVERT_EXPR_CODE_P (code))
4920 	return false;
4921       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4922 	break;
4923       /* FALLTHRU */
4924     unsupported:
4925       if (dump_enabled_p ())
4926 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4927                          "conversion not supported by target.\n");
4928       return false;
4929 
4930     case WIDEN:
4931       if (supportable_widening_operation (code, stmt_info, vectype_out,
4932 					  vectype_in, &code1, &code2,
4933 					  &multi_step_cvt, &interm_types))
4934 	{
4935 	  /* Binary widening operation can only be supported directly by the
4936 	     architecture.  */
4937 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4938 	  break;
4939 	}
4940 
4941       if (code != FLOAT_EXPR
4942 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4943 	goto unsupported;
4944 
4945       fltsz = GET_MODE_SIZE (lhs_mode);
4946       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4947 	{
4948 	  rhs_mode = rhs_mode_iter.require ();
4949 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4950 	    break;
4951 
4952 	  cvt_type
4953 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4954 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4955 	  if (cvt_type == NULL_TREE)
4956 	    goto unsupported;
4957 
4958 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4959 	    {
4960 	      if (!supportable_convert_operation (code, vectype_out,
4961 						  cvt_type, &codecvt1))
4962 		goto unsupported;
4963 	    }
4964 	  else if (!supportable_widening_operation (code, stmt_info,
4965 						    vectype_out, cvt_type,
4966 						    &codecvt1, &codecvt2,
4967 						    &multi_step_cvt,
4968 						    &interm_types))
4969 	    continue;
4970 	  else
4971 	    gcc_assert (multi_step_cvt == 0);
4972 
4973 	  if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4974 					      vectype_in, &code1, &code2,
4975 					      &multi_step_cvt, &interm_types))
4976 	    {
4977 	      found_mode = true;
4978 	      break;
4979 	    }
4980 	}
4981 
4982       if (!found_mode)
4983 	goto unsupported;
4984 
4985       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4986 	codecvt2 = ERROR_MARK;
4987       else
4988 	{
4989 	  multi_step_cvt++;
4990 	  interm_types.safe_push (cvt_type);
4991 	  cvt_type = NULL_TREE;
4992 	}
4993       break;
4994 
4995     case NARROW:
4996       gcc_assert (op_type == unary_op);
4997       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4998 					   &code1, &multi_step_cvt,
4999 					   &interm_types))
5000 	break;
5001 
5002       if (code != FIX_TRUNC_EXPR
5003 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5004 	goto unsupported;
5005 
5006       cvt_type
5007 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5008       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5009       if (cvt_type == NULL_TREE)
5010 	goto unsupported;
5011       if (!supportable_convert_operation (code, cvt_type, vectype_in,
5012 					  &codecvt1))
5013 	goto unsupported;
5014       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5015 					   &code1, &multi_step_cvt,
5016 					   &interm_types))
5017 	break;
5018       goto unsupported;
5019 
5020     default:
5021       gcc_unreachable ();
5022     }
5023 
5024   if (!vec_stmt)		/* transformation not required.  */
5025     {
5026       DUMP_VECT_SCOPE ("vectorizable_conversion");
5027       if (modifier == NONE)
5028         {
5029 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5030 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5031 				  cost_vec);
5032 	}
5033       else if (modifier == NARROW)
5034 	{
5035 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5036 	  /* The final packing step produces one vector result per copy.  */
5037 	  unsigned int nvectors
5038 	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5039 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5040 					      multi_step_cvt, cost_vec);
5041 	}
5042       else
5043 	{
5044 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5045 	  /* The initial unpacking step produces two vector results
5046 	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
5047 	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
5048 	  unsigned int nvectors
5049 	    = (slp_node
5050 	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5051 	       : ncopies * 2);
5052 	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5053 					      multi_step_cvt, cost_vec);
5054 	}
5055       interm_types.release ();
5056       return true;
5057     }
5058 
5059   /* Transform.  */
5060   if (dump_enabled_p ())
5061     dump_printf_loc (MSG_NOTE, vect_location,
5062                      "transform conversion. ncopies = %d.\n", ncopies);
5063 
5064   if (op_type == binary_op)
5065     {
5066       if (CONSTANT_CLASS_P (op0))
5067 	op0 = fold_convert (TREE_TYPE (op1), op0);
5068       else if (CONSTANT_CLASS_P (op1))
5069 	op1 = fold_convert (TREE_TYPE (op0), op1);
5070     }
5071 
5072   /* In case of multi-step conversion, we first generate conversion operations
5073      to the intermediate types, and then from that types to the final one.
5074      We create vector destinations for the intermediate type (TYPES) received
5075      from supportable_*_operation, and store them in the correct order
5076      for future use in vect_create_vectorized_*_stmts ().  */
5077   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5078   vec_dest = vect_create_destination_var (scalar_dest,
5079 					  (cvt_type && modifier == WIDEN)
5080 					  ? cvt_type : vectype_out);
5081   vec_dsts.quick_push (vec_dest);
5082 
5083   if (multi_step_cvt)
5084     {
5085       for (i = interm_types.length () - 1;
5086 	   interm_types.iterate (i, &intermediate_type); i--)
5087 	{
5088 	  vec_dest = vect_create_destination_var (scalar_dest,
5089 						  intermediate_type);
5090 	  vec_dsts.quick_push (vec_dest);
5091 	}
5092     }
5093 
5094   if (cvt_type)
5095     vec_dest = vect_create_destination_var (scalar_dest,
5096 					    modifier == WIDEN
5097 					    ? vectype_out : cvt_type);
5098 
5099   if (!slp_node)
5100     {
5101       if (modifier == WIDEN)
5102 	{
5103 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5104 	  if (op_type == binary_op)
5105 	    vec_oprnds1.create (1);
5106 	}
5107       else if (modifier == NARROW)
5108 	vec_oprnds0.create (
5109 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5110     }
5111   else if (code == WIDEN_LSHIFT_EXPR)
5112     vec_oprnds1.create (slp_node->vec_stmts_size);
5113 
5114   last_oprnd = op0;
5115   prev_stmt_info = NULL;
5116   switch (modifier)
5117     {
5118     case NONE:
5119       for (j = 0; j < ncopies; j++)
5120 	{
5121 	  if (j == 0)
5122 	    vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5123 			       NULL, slp_node);
5124 	  else
5125 	    vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5126 
5127 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5128 	    {
5129 	      stmt_vec_info new_stmt_info;
5130 	      /* Arguments are ready, create the new vector stmt.  */
5131 	      gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5132 	      gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5133 	      new_temp = make_ssa_name (vec_dest, new_stmt);
5134 	      gimple_assign_set_lhs (new_stmt, new_temp);
5135 	      new_stmt_info
5136 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5137 
5138 	      if (slp_node)
5139 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5140 	      else
5141 		{
5142 		  if (!prev_stmt_info)
5143 		    STMT_VINFO_VEC_STMT (stmt_info)
5144 		      = *vec_stmt = new_stmt_info;
5145 		  else
5146 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5147 		  prev_stmt_info = new_stmt_info;
5148 		}
5149 	    }
5150 	}
5151       break;
5152 
5153     case WIDEN:
5154       /* In case the vectorization factor (VF) is bigger than the number
5155 	 of elements that we can fit in a vectype (nunits), we have to
5156 	 generate more than one vector stmt - i.e - we need to "unroll"
5157 	 the vector stmt by a factor VF/nunits.  */
5158       for (j = 0; j < ncopies; j++)
5159 	{
5160 	  /* Handle uses.  */
5161 	  if (j == 0)
5162 	    {
5163 	      if (slp_node)
5164 		{
5165 		  if (code == WIDEN_LSHIFT_EXPR)
5166 		    {
5167 		      unsigned int k;
5168 
5169 		      vec_oprnd1 = op1;
5170 		      /* Store vec_oprnd1 for every vector stmt to be created
5171 			 for SLP_NODE.  We check during the analysis that all
5172 			 the shift arguments are the same.  */
5173 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5174 			vec_oprnds1.quick_push (vec_oprnd1);
5175 
5176 		      vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5177 					 &vec_oprnds0, NULL, slp_node);
5178 		    }
5179 		  else
5180 		    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5181 				       &vec_oprnds1, slp_node);
5182 		}
5183 	      else
5184 		{
5185 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5186 		  vec_oprnds0.quick_push (vec_oprnd0);
5187 		  if (op_type == binary_op)
5188 		    {
5189 		      if (code == WIDEN_LSHIFT_EXPR)
5190 			vec_oprnd1 = op1;
5191 		      else
5192 			vec_oprnd1
5193 			  = vect_get_vec_def_for_operand (op1, stmt_info);
5194 		      vec_oprnds1.quick_push (vec_oprnd1);
5195 		    }
5196 		}
5197 	    }
5198 	  else
5199 	    {
5200 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5201 	      vec_oprnds0.truncate (0);
5202 	      vec_oprnds0.quick_push (vec_oprnd0);
5203 	      if (op_type == binary_op)
5204 		{
5205 		  if (code == WIDEN_LSHIFT_EXPR)
5206 		    vec_oprnd1 = op1;
5207 		  else
5208 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5209 								 vec_oprnd1);
5210 		  vec_oprnds1.truncate (0);
5211 		  vec_oprnds1.quick_push (vec_oprnd1);
5212 		}
5213 	    }
5214 
5215 	  /* Arguments are ready.  Create the new vector stmts.  */
5216 	  for (i = multi_step_cvt; i >= 0; i--)
5217 	    {
5218 	      tree this_dest = vec_dsts[i];
5219 	      enum tree_code c1 = code1, c2 = code2;
5220 	      if (i == 0 && codecvt2 != ERROR_MARK)
5221 		{
5222 		  c1 = codecvt1;
5223 		  c2 = codecvt2;
5224 		}
5225 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5226 						      &vec_oprnds1, stmt_info,
5227 						      this_dest, gsi,
5228 						      c1, c2, op_type);
5229 	    }
5230 
5231 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5232 	    {
5233 	      stmt_vec_info new_stmt_info;
5234 	      if (cvt_type)
5235 		{
5236 		  gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5237 		  new_temp = make_ssa_name (vec_dest);
5238 		  gassign *new_stmt
5239 		    = gimple_build_assign (new_temp, codecvt1, vop0);
5240 		  new_stmt_info
5241 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5242 		}
5243 	      else
5244 		new_stmt_info = vinfo->lookup_def (vop0);
5245 
5246 	      if (slp_node)
5247 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5248 	      else
5249 		{
5250 		  if (!prev_stmt_info)
5251 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5252 		  else
5253 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5254 		  prev_stmt_info = new_stmt_info;
5255 		}
5256 	    }
5257 	}
5258 
5259       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5260       break;
5261 
5262     case NARROW:
5263       /* In case the vectorization factor (VF) is bigger than the number
5264 	 of elements that we can fit in a vectype (nunits), we have to
5265 	 generate more than one vector stmt - i.e - we need to "unroll"
5266 	 the vector stmt by a factor VF/nunits.  */
5267       for (j = 0; j < ncopies; j++)
5268 	{
5269 	  /* Handle uses.  */
5270 	  if (slp_node)
5271 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5272 			       slp_node);
5273 	  else
5274 	    {
5275 	      vec_oprnds0.truncate (0);
5276 	      vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5277 					vect_pow2 (multi_step_cvt) - 1);
5278 	    }
5279 
5280 	  /* Arguments are ready.  Create the new vector stmts.  */
5281 	  if (cvt_type)
5282 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5283 	      {
5284 		gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5285 		new_temp = make_ssa_name (vec_dest);
5286 		gassign *new_stmt
5287 		    = gimple_build_assign (new_temp, codecvt1, vop0);
5288 		vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5289 		vec_oprnds0[i] = new_temp;
5290 	      }
5291 
5292 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5293 						 stmt_info, vec_dsts, gsi,
5294 						 slp_node, code1,
5295 						 &prev_stmt_info);
5296 	}
5297 
5298       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5299       break;
5300     }
5301 
5302   vec_oprnds0.release ();
5303   vec_oprnds1.release ();
5304   interm_types.release ();
5305 
5306   return true;
5307 }
5308 
5309 /* Return true if we can assume from the scalar form of STMT_INFO that
5310    neither the scalar nor the vector forms will generate code.  STMT_INFO
5311    is known not to involve a data reference.  */
5312 
5313 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5314 vect_nop_conversion_p (stmt_vec_info stmt_info)
5315 {
5316   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5317   if (!stmt)
5318     return false;
5319 
5320   tree lhs = gimple_assign_lhs (stmt);
5321   tree_code code = gimple_assign_rhs_code (stmt);
5322   tree rhs = gimple_assign_rhs1 (stmt);
5323 
5324   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5325     return true;
5326 
5327   if (CONVERT_EXPR_CODE_P (code))
5328     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5329 
5330   return false;
5331 }
5332 
5333 /* Function vectorizable_assignment.
5334 
5335    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5336    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5337    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5338    Return true if STMT_INFO is vectorizable in this way.  */
5339 
5340 static bool
vectorizable_assignment(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5341 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5342 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
5343 			 stmt_vector_for_cost *cost_vec)
5344 {
5345   tree vec_dest;
5346   tree scalar_dest;
5347   tree op;
5348   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5349   tree new_temp;
5350   enum vect_def_type dt[1] = {vect_unknown_def_type};
5351   int ndts = 1;
5352   int ncopies;
5353   int i, j;
5354   vec<tree> vec_oprnds = vNULL;
5355   tree vop;
5356   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5357   vec_info *vinfo = stmt_info->vinfo;
5358   stmt_vec_info prev_stmt_info = NULL;
5359   enum tree_code code;
5360   tree vectype_in;
5361 
5362   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5363     return false;
5364 
5365   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5366       && ! vec_stmt)
5367     return false;
5368 
5369   /* Is vectorizable assignment?  */
5370   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5371   if (!stmt)
5372     return false;
5373 
5374   scalar_dest = gimple_assign_lhs (stmt);
5375   if (TREE_CODE (scalar_dest) != SSA_NAME)
5376     return false;
5377 
5378   code = gimple_assign_rhs_code (stmt);
5379   if (gimple_assign_single_p (stmt)
5380       || code == PAREN_EXPR
5381       || CONVERT_EXPR_CODE_P (code))
5382     op = gimple_assign_rhs1 (stmt);
5383   else
5384     return false;
5385 
5386   if (code == VIEW_CONVERT_EXPR)
5387     op = TREE_OPERAND (op, 0);
5388 
5389   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5390   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5391 
5392   /* Multiple types in SLP are handled by creating the appropriate number of
5393      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5394      case of SLP.  */
5395   if (slp_node)
5396     ncopies = 1;
5397   else
5398     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5399 
5400   gcc_assert (ncopies >= 1);
5401 
5402   if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5403     {
5404       if (dump_enabled_p ())
5405         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406                          "use not simple.\n");
5407       return false;
5408     }
5409 
5410   /* We can handle NOP_EXPR conversions that do not change the number
5411      of elements or the vector size.  */
5412   if ((CONVERT_EXPR_CODE_P (code)
5413        || code == VIEW_CONVERT_EXPR)
5414       && (!vectype_in
5415 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5416 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5417 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5418     return false;
5419 
5420   /* We do not handle bit-precision changes.  */
5421   if ((CONVERT_EXPR_CODE_P (code)
5422        || code == VIEW_CONVERT_EXPR)
5423       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5424       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5425 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5426       /* But a conversion that does not change the bit-pattern is ok.  */
5427       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5428 	    > TYPE_PRECISION (TREE_TYPE (op)))
5429 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
5430       /* Conversion between boolean types of different sizes is
5431 	 a simple assignment in case their vectypes are same
5432 	 boolean vectors.  */
5433       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5434 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5435     {
5436       if (dump_enabled_p ())
5437         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5438                          "type conversion to/from bit-precision "
5439                          "unsupported.\n");
5440       return false;
5441     }
5442 
5443   if (!vec_stmt) /* transformation not required.  */
5444     {
5445       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5446       DUMP_VECT_SCOPE ("vectorizable_assignment");
5447       if (!vect_nop_conversion_p (stmt_info))
5448 	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5449 				cost_vec);
5450       return true;
5451     }
5452 
5453   /* Transform.  */
5454   if (dump_enabled_p ())
5455     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5456 
5457   /* Handle def.  */
5458   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5459 
5460   /* Handle use.  */
5461   for (j = 0; j < ncopies; j++)
5462     {
5463       /* Handle uses.  */
5464       if (j == 0)
5465 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5466       else
5467 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5468 
5469       /* Arguments are ready. create the new vector stmt.  */
5470       stmt_vec_info new_stmt_info = NULL;
5471       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5472        {
5473 	 if (CONVERT_EXPR_CODE_P (code)
5474 	     || code == VIEW_CONVERT_EXPR)
5475 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5476 	 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5477          new_temp = make_ssa_name (vec_dest, new_stmt);
5478          gimple_assign_set_lhs (new_stmt, new_temp);
5479 	 new_stmt_info
5480 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5481          if (slp_node)
5482 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5483        }
5484 
5485       if (slp_node)
5486         continue;
5487 
5488       if (j == 0)
5489 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5490       else
5491 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5492 
5493       prev_stmt_info = new_stmt_info;
5494     }
5495 
5496   vec_oprnds.release ();
5497   return true;
5498 }
5499 
5500 
5501 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5502    either as shift by a scalar or by a vector.  */
5503 
5504 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5505 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5506 {
5507 
5508   machine_mode vec_mode;
5509   optab optab;
5510   int icode;
5511   tree vectype;
5512 
5513   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5514   if (!vectype)
5515     return false;
5516 
5517   optab = optab_for_tree_code (code, vectype, optab_scalar);
5518   if (!optab
5519       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5520     {
5521       optab = optab_for_tree_code (code, vectype, optab_vector);
5522       if (!optab
5523           || (optab_handler (optab, TYPE_MODE (vectype))
5524                       == CODE_FOR_nothing))
5525         return false;
5526     }
5527 
5528   vec_mode = TYPE_MODE (vectype);
5529   icode = (int) optab_handler (optab, vec_mode);
5530   if (icode == CODE_FOR_nothing)
5531     return false;
5532 
5533   return true;
5534 }
5535 
5536 
5537 /* Function vectorizable_shift.
5538 
5539    Check if STMT_INFO performs a shift operation that can be vectorized.
5540    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5541    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5542    Return true if STMT_INFO is vectorizable in this way.  */
5543 
5544 static bool
vectorizable_shift(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5545 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5546 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
5547 		    stmt_vector_for_cost *cost_vec)
5548 {
5549   tree vec_dest;
5550   tree scalar_dest;
5551   tree op0, op1 = NULL;
5552   tree vec_oprnd1 = NULL_TREE;
5553   tree vectype;
5554   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5555   enum tree_code code;
5556   machine_mode vec_mode;
5557   tree new_temp;
5558   optab optab;
5559   int icode;
5560   machine_mode optab_op2_mode;
5561   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5562   int ndts = 2;
5563   stmt_vec_info prev_stmt_info;
5564   poly_uint64 nunits_in;
5565   poly_uint64 nunits_out;
5566   tree vectype_out;
5567   tree op1_vectype;
5568   int ncopies;
5569   int j, i;
5570   vec<tree> vec_oprnds0 = vNULL;
5571   vec<tree> vec_oprnds1 = vNULL;
5572   tree vop0, vop1;
5573   unsigned int k;
5574   bool scalar_shift_arg = true;
5575   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5576   vec_info *vinfo = stmt_info->vinfo;
5577   bool incompatible_op1_vectype_p = false;
5578 
5579   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5580     return false;
5581 
5582   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5583       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5584       && ! vec_stmt)
5585     return false;
5586 
5587   /* Is STMT a vectorizable binary/unary operation?   */
5588   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5589   if (!stmt)
5590     return false;
5591 
5592   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5593     return false;
5594 
5595   code = gimple_assign_rhs_code (stmt);
5596 
5597   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5598       || code == RROTATE_EXPR))
5599     return false;
5600 
5601   scalar_dest = gimple_assign_lhs (stmt);
5602   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5603   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5604     {
5605       if (dump_enabled_p ())
5606         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5607                          "bit-precision shifts not supported.\n");
5608       return false;
5609     }
5610 
5611   op0 = gimple_assign_rhs1 (stmt);
5612   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5613     {
5614       if (dump_enabled_p ())
5615         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5616                          "use not simple.\n");
5617       return false;
5618     }
5619   /* If op0 is an external or constant def, infer the vector type
5620      from the scalar type.  */
5621   if (!vectype)
5622     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5623   if (vec_stmt)
5624     gcc_assert (vectype);
5625   if (!vectype)
5626     {
5627       if (dump_enabled_p ())
5628         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5629                          "no vectype for scalar type\n");
5630       return false;
5631     }
5632 
5633   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5634   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5635   if (maybe_ne (nunits_out, nunits_in))
5636     return false;
5637 
5638   op1 = gimple_assign_rhs2 (stmt);
5639   stmt_vec_info op1_def_stmt_info;
5640   if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5641 			   &op1_def_stmt_info))
5642     {
5643       if (dump_enabled_p ())
5644         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5645                          "use not simple.\n");
5646       return false;
5647     }
5648 
5649   /* Multiple types in SLP are handled by creating the appropriate number of
5650      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5651      case of SLP.  */
5652   if (slp_node)
5653     ncopies = 1;
5654   else
5655     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5656 
5657   gcc_assert (ncopies >= 1);
5658 
5659   /* Determine whether the shift amount is a vector, or scalar.  If the
5660      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5661 
5662   if ((dt[1] == vect_internal_def
5663        || dt[1] == vect_induction_def
5664        || dt[1] == vect_nested_cycle)
5665       && !slp_node)
5666     scalar_shift_arg = false;
5667   else if (dt[1] == vect_constant_def
5668 	   || dt[1] == vect_external_def
5669 	   || dt[1] == vect_internal_def)
5670     {
5671       /* In SLP, need to check whether the shift count is the same,
5672 	 in loops if it is a constant or invariant, it is always
5673 	 a scalar shift.  */
5674       if (slp_node)
5675 	{
5676 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5677 	  stmt_vec_info slpstmt_info;
5678 
5679 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5680 	    {
5681 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5682 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5683 		scalar_shift_arg = false;
5684 	    }
5685 
5686 	  /* For internal SLP defs we have to make sure we see scalar stmts
5687 	     for all vector elements.
5688 	     ???  For different vectors we could resort to a different
5689 	     scalar shift operand but code-generation below simply always
5690 	     takes the first.  */
5691 	  if (dt[1] == vect_internal_def
5692 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5693 			   stmts.length ()))
5694 	    scalar_shift_arg = false;
5695 	}
5696 
5697       /* If the shift amount is computed by a pattern stmt we cannot
5698          use the scalar amount directly thus give up and use a vector
5699 	 shift.  */
5700       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5701 	scalar_shift_arg = false;
5702     }
5703   else
5704     {
5705       if (dump_enabled_p ())
5706         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5707                          "operand mode requires invariant argument.\n");
5708       return false;
5709     }
5710 
5711   /* Vector shifted by vector.  */
5712   bool was_scalar_shift_arg = scalar_shift_arg;
5713   if (!scalar_shift_arg)
5714     {
5715       optab = optab_for_tree_code (code, vectype, optab_vector);
5716       if (dump_enabled_p ())
5717         dump_printf_loc (MSG_NOTE, vect_location,
5718                          "vector/vector shift/rotate found.\n");
5719 
5720       if (!op1_vectype)
5721 	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5722 						   slp_node);
5723       incompatible_op1_vectype_p
5724 	= (op1_vectype == NULL_TREE
5725 	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5726 			TYPE_VECTOR_SUBPARTS (vectype))
5727 	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5728       if (incompatible_op1_vectype_p
5729 	  && (!slp_node
5730 	      || SLP_TREE_DEF_TYPE
5731 		   (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5732 	{
5733 	  if (dump_enabled_p ())
5734 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5735                              "unusable type for last operand in"
5736                              " vector/vector shift/rotate.\n");
5737 	  return false;
5738 	}
5739     }
5740   /* See if the machine has a vector shifted by scalar insn and if not
5741      then see if it has a vector shifted by vector insn.  */
5742   else
5743     {
5744       optab = optab_for_tree_code (code, vectype, optab_scalar);
5745       if (optab
5746           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5747         {
5748           if (dump_enabled_p ())
5749             dump_printf_loc (MSG_NOTE, vect_location,
5750                              "vector/scalar shift/rotate found.\n");
5751         }
5752       else
5753         {
5754           optab = optab_for_tree_code (code, vectype, optab_vector);
5755           if (optab
5756                && (optab_handler (optab, TYPE_MODE (vectype))
5757                       != CODE_FOR_nothing))
5758             {
5759 	      scalar_shift_arg = false;
5760 
5761               if (dump_enabled_p ())
5762                 dump_printf_loc (MSG_NOTE, vect_location,
5763                                  "vector/vector shift/rotate found.\n");
5764 
5765               /* Unlike the other binary operators, shifts/rotates have
5766                  the rhs being int, instead of the same type as the lhs,
5767                  so make sure the scalar is the right type if we are
5768 		 dealing with vectors of long long/long/short/char.  */
5769 	      incompatible_op1_vectype_p
5770 		= !tree_nop_conversion_p (TREE_TYPE (vectype),
5771 					  TREE_TYPE (op1));
5772             }
5773         }
5774     }
5775 
5776   /* Supportable by target?  */
5777   if (!optab)
5778     {
5779       if (dump_enabled_p ())
5780         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5781                          "no optab.\n");
5782       return false;
5783     }
5784   vec_mode = TYPE_MODE (vectype);
5785   icode = (int) optab_handler (optab, vec_mode);
5786   if (icode == CODE_FOR_nothing)
5787     {
5788       if (dump_enabled_p ())
5789         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5790                          "op not supported by target.\n");
5791       /* Check only during analysis.  */
5792       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5793 	  || (!vec_stmt
5794 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5795         return false;
5796       if (dump_enabled_p ())
5797         dump_printf_loc (MSG_NOTE, vect_location,
5798                          "proceeding using word mode.\n");
5799     }
5800 
5801   /* Worthwhile without SIMD support?  Check only during analysis.  */
5802   if (!vec_stmt
5803       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5804       && !vect_worthwhile_without_simd_p (vinfo, code))
5805     {
5806       if (dump_enabled_p ())
5807         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5808                          "not worthwhile without SIMD support.\n");
5809       return false;
5810     }
5811 
5812   if (!vec_stmt) /* transformation not required.  */
5813     {
5814       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5815       DUMP_VECT_SCOPE ("vectorizable_shift");
5816       vect_model_simple_cost (stmt_info, ncopies, dt,
5817 			      scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5818       return true;
5819     }
5820 
5821   /* Transform.  */
5822 
5823   if (dump_enabled_p ())
5824     dump_printf_loc (MSG_NOTE, vect_location,
5825                      "transform binary/unary operation.\n");
5826 
5827   if (incompatible_op1_vectype_p && !slp_node)
5828     {
5829       op1 = fold_convert (TREE_TYPE (vectype), op1);
5830       if (dt[1] != vect_constant_def)
5831 	op1 = vect_init_vector (stmt_info, op1,
5832 				TREE_TYPE (vectype), NULL);
5833     }
5834 
5835   /* Handle def.  */
5836   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5837 
5838   prev_stmt_info = NULL;
5839   for (j = 0; j < ncopies; j++)
5840     {
5841       /* Handle uses.  */
5842       if (j == 0)
5843         {
5844           if (scalar_shift_arg)
5845             {
5846               /* Vector shl and shr insn patterns can be defined with scalar
5847                  operand 2 (shift operand).  In this case, use constant or loop
5848                  invariant op1 directly, without extending it to vector mode
5849                  first.  */
5850               optab_op2_mode = insn_data[icode].operand[2].mode;
5851               if (!VECTOR_MODE_P (optab_op2_mode))
5852                 {
5853                   if (dump_enabled_p ())
5854                     dump_printf_loc (MSG_NOTE, vect_location,
5855                                      "operand 1 using scalar mode.\n");
5856                   vec_oprnd1 = op1;
5857                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5858                   vec_oprnds1.quick_push (vec_oprnd1);
5859                   if (slp_node)
5860                     {
5861                       /* Store vec_oprnd1 for every vector stmt to be created
5862                          for SLP_NODE.  We check during the analysis that all
5863                          the shift arguments are the same.
5864                          TODO: Allow different constants for different vector
5865                          stmts generated for an SLP instance.  */
5866                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5867                         vec_oprnds1.quick_push (vec_oprnd1);
5868                     }
5869                 }
5870             }
5871 	  else if (slp_node && incompatible_op1_vectype_p)
5872 	    {
5873 	      if (was_scalar_shift_arg)
5874 		{
5875 		  /* If the argument was the same in all lanes create
5876 		     the correctly typed vector shift amount directly.  */
5877 		  op1 = fold_convert (TREE_TYPE (vectype), op1);
5878 		  op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5879 					  !loop_vinfo ? gsi : NULL);
5880 		  vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5881 						 !loop_vinfo ? gsi : NULL);
5882                   vec_oprnds1.create (slp_node->vec_stmts_size);
5883 		  for (k = 0; k < slp_node->vec_stmts_size; k++)
5884 		    vec_oprnds1.quick_push (vec_oprnd1);
5885 		}
5886 	      else if (dt[1] == vect_constant_def)
5887 		{
5888 		  /* Convert the scalar constant shift amounts in-place.  */
5889 		  slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5890 		  gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5891 		  for (unsigned i = 0;
5892 		       i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5893 		    {
5894 		      SLP_TREE_SCALAR_OPS (shift)[i]
5895 			  = fold_convert (TREE_TYPE (vectype),
5896 					  SLP_TREE_SCALAR_OPS (shift)[i]);
5897 		      gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5898 				   == INTEGER_CST));
5899 		    }
5900 		}
5901 	      else
5902 		gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5903 	    }
5904 
5905           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5906              (a special case for certain kind of vector shifts); otherwise,
5907              operand 1 should be of a vector type (the usual case).  */
5908           if (vec_oprnd1)
5909 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5910 			       slp_node);
5911           else
5912 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5913 			       slp_node);
5914         }
5915       else
5916 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5917 
5918       /* Arguments are ready.  Create the new vector stmt.  */
5919       stmt_vec_info new_stmt_info = NULL;
5920       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5921         {
5922           vop1 = vec_oprnds1[i];
5923 	  gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5924           new_temp = make_ssa_name (vec_dest, new_stmt);
5925           gimple_assign_set_lhs (new_stmt, new_temp);
5926 	  new_stmt_info
5927 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5928           if (slp_node)
5929 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5930         }
5931 
5932       if (slp_node)
5933         continue;
5934 
5935       if (j == 0)
5936 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5937       else
5938 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5939       prev_stmt_info = new_stmt_info;
5940     }
5941 
5942   vec_oprnds0.release ();
5943   vec_oprnds1.release ();
5944 
5945   return true;
5946 }
5947 
5948 
5949 /* Function vectorizable_operation.
5950 
5951    Check if STMT_INFO performs a binary, unary or ternary operation that can
5952    be vectorized.
5953    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5954    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5955    Return true if STMT_INFO is vectorizable in this way.  */
5956 
5957 static bool
vectorizable_operation(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5958 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5959 			stmt_vec_info *vec_stmt, slp_tree slp_node,
5960 			stmt_vector_for_cost *cost_vec)
5961 {
5962   tree vec_dest;
5963   tree scalar_dest;
5964   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5965   tree vectype;
5966   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5967   enum tree_code code, orig_code;
5968   machine_mode vec_mode;
5969   tree new_temp;
5970   int op_type;
5971   optab optab;
5972   bool target_support_p;
5973   enum vect_def_type dt[3]
5974     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5975   int ndts = 3;
5976   stmt_vec_info prev_stmt_info;
5977   poly_uint64 nunits_in;
5978   poly_uint64 nunits_out;
5979   tree vectype_out;
5980   int ncopies, vec_num;
5981   int j, i;
5982   vec<tree> vec_oprnds0 = vNULL;
5983   vec<tree> vec_oprnds1 = vNULL;
5984   vec<tree> vec_oprnds2 = vNULL;
5985   tree vop0, vop1, vop2;
5986   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5987   vec_info *vinfo = stmt_info->vinfo;
5988 
5989   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5990     return false;
5991 
5992   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5993       && ! vec_stmt)
5994     return false;
5995 
5996   /* Is STMT a vectorizable binary/unary operation?   */
5997   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5998   if (!stmt)
5999     return false;
6000 
6001   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6002     return false;
6003 
6004   orig_code = code = gimple_assign_rhs_code (stmt);
6005 
6006   /* Shifts are handled in vectorizable_shift.  */
6007   if (code == LSHIFT_EXPR
6008       || code == RSHIFT_EXPR
6009       || code == LROTATE_EXPR
6010       || code == RROTATE_EXPR)
6011    return false;
6012 
6013   /* Comparisons are handled in vectorizable_comparison.  */
6014   if (TREE_CODE_CLASS (code) == tcc_comparison)
6015     return false;
6016 
6017   /* Conditions are handled in vectorizable_condition.  */
6018   if (code == COND_EXPR)
6019     return false;
6020 
6021   /* For pointer addition and subtraction, we should use the normal
6022      plus and minus for the vector operation.  */
6023   if (code == POINTER_PLUS_EXPR)
6024     code = PLUS_EXPR;
6025   if (code == POINTER_DIFF_EXPR)
6026     code = MINUS_EXPR;
6027 
6028   /* Support only unary or binary operations.  */
6029   op_type = TREE_CODE_LENGTH (code);
6030   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6031     {
6032       if (dump_enabled_p ())
6033         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6034                          "num. args = %d (not unary/binary/ternary op).\n",
6035                          op_type);
6036       return false;
6037     }
6038 
6039   scalar_dest = gimple_assign_lhs (stmt);
6040   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6041 
6042   /* Most operations cannot handle bit-precision types without extra
6043      truncations.  */
6044   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6045   if (!mask_op_p
6046       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6047       /* Exception are bitwise binary operations.  */
6048       && code != BIT_IOR_EXPR
6049       && code != BIT_XOR_EXPR
6050       && code != BIT_AND_EXPR)
6051     {
6052       if (dump_enabled_p ())
6053         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6054                          "bit-precision arithmetic not supported.\n");
6055       return false;
6056     }
6057 
6058   op0 = gimple_assign_rhs1 (stmt);
6059   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6060     {
6061       if (dump_enabled_p ())
6062         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063                          "use not simple.\n");
6064       return false;
6065     }
6066   /* If op0 is an external or constant def, infer the vector type
6067      from the scalar type.  */
6068   if (!vectype)
6069     {
6070       /* For boolean type we cannot determine vectype by
6071 	 invariant value (don't know whether it is a vector
6072 	 of booleans or vector of integers).  We use output
6073 	 vectype because operations on boolean don't change
6074 	 type.  */
6075       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6076 	{
6077 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6078 	    {
6079 	      if (dump_enabled_p ())
6080 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6081 				 "not supported operation on bool value.\n");
6082 	      return false;
6083 	    }
6084 	  vectype = vectype_out;
6085 	}
6086       else
6087 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6088 					       slp_node);
6089     }
6090   if (vec_stmt)
6091     gcc_assert (vectype);
6092   if (!vectype)
6093     {
6094       if (dump_enabled_p ())
6095 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6096 			 "no vectype for scalar type %T\n",
6097 			 TREE_TYPE (op0));
6098 
6099       return false;
6100     }
6101 
6102   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6103   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6104   if (maybe_ne (nunits_out, nunits_in))
6105     return false;
6106 
6107   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6108   if (op_type == binary_op || op_type == ternary_op)
6109     {
6110       op1 = gimple_assign_rhs2 (stmt);
6111       if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
6112 	{
6113 	  if (dump_enabled_p ())
6114 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6115                              "use not simple.\n");
6116 	  return false;
6117 	}
6118     }
6119   if (op_type == ternary_op)
6120     {
6121       op2 = gimple_assign_rhs3 (stmt);
6122       if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
6123 	{
6124 	  if (dump_enabled_p ())
6125 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6126                              "use not simple.\n");
6127 	  return false;
6128 	}
6129     }
6130 
6131   /* Multiple types in SLP are handled by creating the appropriate number of
6132      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6133      case of SLP.  */
6134   if (slp_node)
6135     {
6136       ncopies = 1;
6137       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6138     }
6139   else
6140     {
6141       ncopies = vect_get_num_copies (loop_vinfo, vectype);
6142       vec_num = 1;
6143     }
6144 
6145   gcc_assert (ncopies >= 1);
6146 
6147   /* Reject attempts to combine mask types with nonmask types, e.g. if
6148      we have an AND between a (nonmask) boolean loaded from memory and
6149      a (mask) boolean result of a comparison.
6150 
6151      TODO: We could easily fix these cases up using pattern statements.  */
6152   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6153       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6154       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6155     {
6156       if (dump_enabled_p ())
6157 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6158 			 "mixed mask and nonmask vector types\n");
6159       return false;
6160     }
6161 
6162   /* Supportable by target?  */
6163 
6164   vec_mode = TYPE_MODE (vectype);
6165   if (code == MULT_HIGHPART_EXPR)
6166     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6167   else
6168     {
6169       optab = optab_for_tree_code (code, vectype, optab_default);
6170       if (!optab)
6171 	{
6172           if (dump_enabled_p ())
6173             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6174                              "no optab.\n");
6175 	  return false;
6176 	}
6177       target_support_p = (optab_handler (optab, vec_mode)
6178 			  != CODE_FOR_nothing);
6179     }
6180 
6181   if (!target_support_p)
6182     {
6183       if (dump_enabled_p ())
6184 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6185                          "op not supported by target.\n");
6186       /* Check only during analysis.  */
6187       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6188 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6189         return false;
6190       if (dump_enabled_p ())
6191 	dump_printf_loc (MSG_NOTE, vect_location,
6192                          "proceeding using word mode.\n");
6193     }
6194 
6195   /* Worthwhile without SIMD support?  Check only during analysis.  */
6196   if (!VECTOR_MODE_P (vec_mode)
6197       && !vec_stmt
6198       && !vect_worthwhile_without_simd_p (vinfo, code))
6199     {
6200       if (dump_enabled_p ())
6201         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6202                          "not worthwhile without SIMD support.\n");
6203       return false;
6204     }
6205 
6206   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6207   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6208   internal_fn cond_fn = get_conditional_internal_fn (code);
6209 
6210   if (!vec_stmt) /* transformation not required.  */
6211     {
6212       /* If this operation is part of a reduction, a fully-masked loop
6213 	 should only change the active lanes of the reduction chain,
6214 	 keeping the inactive lanes as-is.  */
6215       if (loop_vinfo
6216 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6217 	  && reduc_idx >= 0)
6218 	{
6219 	  if (cond_fn == IFN_LAST
6220 	      || !direct_internal_fn_supported_p (cond_fn, vectype,
6221 						  OPTIMIZE_FOR_SPEED))
6222 	    {
6223 	      if (dump_enabled_p ())
6224 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6225 				 "can't use a fully-masked loop because no"
6226 				 " conditional operation is available.\n");
6227 	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6228 	    }
6229 	  else
6230 	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6231 				   vectype, NULL);
6232 	}
6233 
6234       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6235       DUMP_VECT_SCOPE ("vectorizable_operation");
6236       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6237       return true;
6238     }
6239 
6240   /* Transform.  */
6241 
6242   if (dump_enabled_p ())
6243     dump_printf_loc (MSG_NOTE, vect_location,
6244                      "transform binary/unary operation.\n");
6245 
6246   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6247 
6248   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6249      vectors with unsigned elements, but the result is signed.  So, we
6250      need to compute the MINUS_EXPR into vectype temporary and
6251      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6252   tree vec_cvt_dest = NULL_TREE;
6253   if (orig_code == POINTER_DIFF_EXPR)
6254     {
6255       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6256       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6257     }
6258   /* Handle def.  */
6259   else
6260     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6261 
6262   /* In case the vectorization factor (VF) is bigger than the number
6263      of elements that we can fit in a vectype (nunits), we have to generate
6264      more than one vector stmt - i.e - we need to "unroll" the
6265      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6266      from one copy of the vector stmt to the next, in the field
6267      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6268      stages to find the correct vector defs to be used when vectorizing
6269      stmts that use the defs of the current stmt.  The example below
6270      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6271      we need to create 4 vectorized stmts):
6272 
6273      before vectorization:
6274                                 RELATED_STMT    VEC_STMT
6275         S1:     x = memref      -               -
6276         S2:     z = x + 1       -               -
6277 
6278      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6279              there):
6280                                 RELATED_STMT    VEC_STMT
6281         VS1_0:  vx0 = memref0   VS1_1           -
6282         VS1_1:  vx1 = memref1   VS1_2           -
6283         VS1_2:  vx2 = memref2   VS1_3           -
6284         VS1_3:  vx3 = memref3   -               -
6285         S1:     x = load        -               VS1_0
6286         S2:     z = x + 1       -               -
6287 
6288      step2: vectorize stmt S2 (done here):
6289         To vectorize stmt S2 we first need to find the relevant vector
6290         def for the first operand 'x'.  This is, as usual, obtained from
6291         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6292         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6293         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6294         the vector stmt VS2_0, and as usual, record it in the
6295         STMT_VINFO_VEC_STMT of stmt S2.
6296         When creating the second copy (VS2_1), we obtain the relevant vector
6297         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6298         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6299         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6300         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6301         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6302         chain of stmts and pointers:
6303                                 RELATED_STMT    VEC_STMT
6304         VS1_0:  vx0 = memref0   VS1_1           -
6305         VS1_1:  vx1 = memref1   VS1_2           -
6306         VS1_2:  vx2 = memref2   VS1_3           -
6307         VS1_3:  vx3 = memref3   -               -
6308         S1:     x = load        -               VS1_0
6309         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6310         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6311         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6312         VS2_3:  vz3 = vx3 + v1  -               -
6313         S2:     z = x + 1       -               VS2_0  */
6314 
6315   prev_stmt_info = NULL;
6316   for (j = 0; j < ncopies; j++)
6317     {
6318       /* Handle uses.  */
6319       if (j == 0)
6320 	{
6321 	  if (op_type == binary_op)
6322 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6323 			       slp_node);
6324 	  else if (op_type == ternary_op)
6325 	    {
6326 	      if (slp_node)
6327 		{
6328 		  auto_vec<vec<tree> > vec_defs(3);
6329 		  vect_get_slp_defs (slp_node, &vec_defs);
6330 		  vec_oprnds0 = vec_defs[0];
6331 		  vec_oprnds1 = vec_defs[1];
6332 		  vec_oprnds2 = vec_defs[2];
6333 		}
6334 	      else
6335 		{
6336 		  vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6337 				     &vec_oprnds1, NULL);
6338 		  vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6339 				     NULL, NULL);
6340 		}
6341 	    }
6342 	  else
6343 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6344 			       slp_node);
6345 	}
6346       else
6347 	{
6348 	  vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6349 	  if (op_type == ternary_op)
6350 	    {
6351 	      tree vec_oprnd = vec_oprnds2.pop ();
6352 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6353 							           vec_oprnd));
6354 	    }
6355 	}
6356 
6357       /* Arguments are ready.  Create the new vector stmt.  */
6358       stmt_vec_info new_stmt_info = NULL;
6359       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6360         {
6361 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
6362 		  ? vec_oprnds1[i] : NULL_TREE);
6363 	  vop2 = ((op_type == ternary_op)
6364 		  ? vec_oprnds2[i] : NULL_TREE);
6365 	  if (masked_loop_p && reduc_idx >= 0)
6366 	    {
6367 	      /* Perform the operation on active elements only and take
6368 		 inactive elements from the reduction chain input.  */
6369 	      gcc_assert (!vop2);
6370 	      vop2 = reduc_idx == 1 ? vop1 : vop0;
6371 	      tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6372 					      vectype, i * ncopies + j);
6373 	      gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6374 							vop0, vop1, vop2);
6375 	      new_temp = make_ssa_name (vec_dest, call);
6376 	      gimple_call_set_lhs (call, new_temp);
6377 	      gimple_call_set_nothrow (call, true);
6378 	      new_stmt_info
6379 		= vect_finish_stmt_generation (stmt_info, call, gsi);
6380 	    }
6381 	  else
6382 	    {
6383 	      gassign *new_stmt = gimple_build_assign (vec_dest, code,
6384 						       vop0, vop1, vop2);
6385 	      new_temp = make_ssa_name (vec_dest, new_stmt);
6386 	      gimple_assign_set_lhs (new_stmt, new_temp);
6387 	      new_stmt_info
6388 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6389 	      if (vec_cvt_dest)
6390 		{
6391 		  new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6392 		  gassign *new_stmt
6393 		    = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6394 					   new_temp);
6395 		  new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6396 		  gimple_assign_set_lhs (new_stmt, new_temp);
6397 		  new_stmt_info
6398 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6399 		}
6400 	    }
6401           if (slp_node)
6402 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6403         }
6404 
6405       if (slp_node)
6406         continue;
6407 
6408       if (j == 0)
6409 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6410       else
6411 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6412       prev_stmt_info = new_stmt_info;
6413     }
6414 
6415   vec_oprnds0.release ();
6416   vec_oprnds1.release ();
6417   vec_oprnds2.release ();
6418 
6419   return true;
6420 }
6421 
6422 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6423 
6424 static void
ensure_base_align(dr_vec_info * dr_info)6425 ensure_base_align (dr_vec_info *dr_info)
6426 {
6427   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6428     return;
6429 
6430   if (dr_info->base_misaligned)
6431     {
6432       tree base_decl = dr_info->base_decl;
6433 
6434       // We should only be able to increase the alignment of a base object if
6435       // we know what its new alignment should be at compile time.
6436       unsigned HOST_WIDE_INT align_base_to =
6437 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6438 
6439       if (decl_in_symtab_p (base_decl))
6440 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6441       else if (DECL_ALIGN (base_decl) < align_base_to)
6442 	{
6443 	  SET_DECL_ALIGN (base_decl, align_base_to);
6444           DECL_USER_ALIGN (base_decl) = 1;
6445 	}
6446       dr_info->base_misaligned = false;
6447     }
6448 }
6449 
6450 
6451 /* Function get_group_alias_ptr_type.
6452 
6453    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6454 
6455 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6456 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6457 {
6458   struct data_reference *first_dr, *next_dr;
6459 
6460   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6461   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6462   while (next_stmt_info)
6463     {
6464       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6465       if (get_alias_set (DR_REF (first_dr))
6466 	  != get_alias_set (DR_REF (next_dr)))
6467 	{
6468 	  if (dump_enabled_p ())
6469 	    dump_printf_loc (MSG_NOTE, vect_location,
6470 			     "conflicting alias set types.\n");
6471 	  return ptr_type_node;
6472 	}
6473       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6474     }
6475   return reference_alias_ptr_type (DR_REF (first_dr));
6476 }
6477 
6478 
6479 /* Function scan_operand_equal_p.
6480 
6481    Helper function for check_scan_store.  Compare two references
6482    with .GOMP_SIMD_LANE bases.  */
6483 
6484 static bool
scan_operand_equal_p(tree ref1,tree ref2)6485 scan_operand_equal_p (tree ref1, tree ref2)
6486 {
6487   tree ref[2] = { ref1, ref2 };
6488   poly_int64 bitsize[2], bitpos[2];
6489   tree offset[2], base[2];
6490   for (int i = 0; i < 2; ++i)
6491     {
6492       machine_mode mode;
6493       int unsignedp, reversep, volatilep = 0;
6494       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6495       				     &offset[i], &mode, &unsignedp,
6496       				     &reversep, &volatilep);
6497       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6498 	return false;
6499       if (TREE_CODE (base[i]) == MEM_REF
6500 	  && offset[i] == NULL_TREE
6501 	  && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6502 	{
6503 	  gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6504 	  if (is_gimple_assign (def_stmt)
6505 	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6506 	      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6507 	      && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6508 	    {
6509 	      if (maybe_ne (mem_ref_offset (base[i]), 0))
6510 		return false;
6511 	      base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6512 	      offset[i] = gimple_assign_rhs2 (def_stmt);
6513 	    }
6514 	}
6515     }
6516 
6517   if (!operand_equal_p (base[0], base[1], 0))
6518     return false;
6519   if (maybe_ne (bitsize[0], bitsize[1]))
6520     return false;
6521   if (offset[0] != offset[1])
6522     {
6523       if (!offset[0] || !offset[1])
6524 	return false;
6525       if (!operand_equal_p (offset[0], offset[1], 0))
6526 	{
6527 	  tree step[2];
6528 	  for (int i = 0; i < 2; ++i)
6529 	    {
6530 	      step[i] = integer_one_node;
6531 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6532 		{
6533 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6534 		  if (is_gimple_assign (def_stmt)
6535 		      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6536 		      && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6537 			  == INTEGER_CST))
6538 		    {
6539 		      step[i] = gimple_assign_rhs2 (def_stmt);
6540 		      offset[i] = gimple_assign_rhs1 (def_stmt);
6541 		    }
6542 		}
6543 	      else if (TREE_CODE (offset[i]) == MULT_EXPR)
6544 		{
6545 		  step[i] = TREE_OPERAND (offset[i], 1);
6546 		  offset[i] = TREE_OPERAND (offset[i], 0);
6547 		}
6548 	      tree rhs1 = NULL_TREE;
6549 	      if (TREE_CODE (offset[i]) == SSA_NAME)
6550 		{
6551 		  gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6552 		  if (gimple_assign_cast_p (def_stmt))
6553 		    rhs1 = gimple_assign_rhs1 (def_stmt);
6554 		}
6555 	      else if (CONVERT_EXPR_P (offset[i]))
6556 		rhs1 = TREE_OPERAND (offset[i], 0);
6557 	      if (rhs1
6558 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6559 		  && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6560 		  && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6561 		      >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6562 		offset[i] = rhs1;
6563 	    }
6564 	  if (!operand_equal_p (offset[0], offset[1], 0)
6565 	      || !operand_equal_p (step[0], step[1], 0))
6566 	    return false;
6567 	}
6568     }
6569   return true;
6570 }
6571 
6572 
6573 enum scan_store_kind {
6574   /* Normal permutation.  */
6575   scan_store_kind_perm,
6576 
6577   /* Whole vector left shift permutation with zero init.  */
6578   scan_store_kind_lshift_zero,
6579 
6580   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
6581   scan_store_kind_lshift_cond
6582 };
6583 
6584 /* Function check_scan_store.
6585 
6586    Verify if we can perform the needed permutations or whole vector shifts.
6587    Return -1 on failure, otherwise exact log2 of vectype's nunits.
6588    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6589    to do at each step.  */
6590 
6591 static int
6592 scan_store_can_perm_p (tree vectype, tree init,
6593 		       vec<enum scan_store_kind> *use_whole_vector = NULL)
6594 {
6595   enum machine_mode vec_mode = TYPE_MODE (vectype);
6596   unsigned HOST_WIDE_INT nunits;
6597   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6598     return -1;
6599   int units_log2 = exact_log2 (nunits);
6600   if (units_log2 <= 0)
6601     return -1;
6602 
6603   int i;
6604   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6605   for (i = 0; i <= units_log2; ++i)
6606     {
6607       unsigned HOST_WIDE_INT j, k;
6608       enum scan_store_kind kind = scan_store_kind_perm;
6609       vec_perm_builder sel (nunits, nunits, 1);
6610       sel.quick_grow (nunits);
6611       if (i == units_log2)
6612 	{
6613 	  for (j = 0; j < nunits; ++j)
6614 	    sel[j] = nunits - 1;
6615 	}
6616       else
6617 	{
6618 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6619 	    sel[j] = j;
6620 	  for (k = 0; j < nunits; ++j, ++k)
6621 	    sel[j] = nunits + k;
6622 	}
6623       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6624       if (!can_vec_perm_const_p (vec_mode, indices))
6625 	{
6626 	  if (i == units_log2)
6627 	    return -1;
6628 
6629 	  if (whole_vector_shift_kind == scan_store_kind_perm)
6630 	    {
6631 	      if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6632 		return -1;
6633 	      whole_vector_shift_kind = scan_store_kind_lshift_zero;
6634 	      /* Whole vector shifts shift in zeros, so if init is all zero
6635 		 constant, there is no need to do anything further.  */
6636 	      if ((TREE_CODE (init) != INTEGER_CST
6637 		   && TREE_CODE (init) != REAL_CST)
6638 		  || !initializer_zerop (init))
6639 		{
6640 		  tree masktype = truth_type_for (vectype);
6641 		  if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6642 		    return -1;
6643 		  whole_vector_shift_kind = scan_store_kind_lshift_cond;
6644 		}
6645 	    }
6646 	  kind = whole_vector_shift_kind;
6647 	}
6648       if (use_whole_vector)
6649 	{
6650 	  if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6651 	    use_whole_vector->safe_grow_cleared (i);
6652 	  if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6653 	    use_whole_vector->safe_push (kind);
6654 	}
6655     }
6656 
6657   return units_log2;
6658 }
6659 
6660 
6661 /* Function check_scan_store.
6662 
6663    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
6664 
6665 static bool
check_scan_store(stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6666 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6667 		  enum vect_def_type rhs_dt, bool slp, tree mask,
6668 		  vect_memory_access_type memory_access_type)
6669 {
6670   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6671   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6672   tree ref_type;
6673 
6674   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6675   if (slp
6676       || mask
6677       || memory_access_type != VMAT_CONTIGUOUS
6678       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6679       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6680       || loop_vinfo == NULL
6681       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6682       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6683       || !integer_zerop (get_dr_vinfo_offset (dr_info))
6684       || !integer_zerop (DR_INIT (dr_info->dr))
6685       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6686       || !alias_sets_conflict_p (get_alias_set (vectype),
6687 				 get_alias_set (TREE_TYPE (ref_type))))
6688     {
6689       if (dump_enabled_p ())
6690 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6691 			 "unsupported OpenMP scan store.\n");
6692       return false;
6693     }
6694 
6695   /* We need to pattern match code built by OpenMP lowering and simplified
6696      by following optimizations into something we can handle.
6697      #pragma omp simd reduction(inscan,+:r)
6698      for (...)
6699        {
6700 	 r += something ();
6701 	 #pragma omp scan inclusive (r)
6702 	 use (r);
6703        }
6704      shall have body with:
6705        // Initialization for input phase, store the reduction initializer:
6706        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6707        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6708        D.2042[_21] = 0;
6709        // Actual input phase:
6710        ...
6711        r.0_5 = D.2042[_20];
6712        _6 = _4 + r.0_5;
6713        D.2042[_20] = _6;
6714        // Initialization for scan phase:
6715        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6716        _26 = D.2043[_25];
6717        _27 = D.2042[_25];
6718        _28 = _26 + _27;
6719        D.2043[_25] = _28;
6720        D.2042[_25] = _28;
6721        // Actual scan phase:
6722        ...
6723        r.1_8 = D.2042[_20];
6724        ...
6725      The "omp simd array" variable D.2042 holds the privatized copy used
6726      inside of the loop and D.2043 is another one that holds copies of
6727      the current original list item.  The separate GOMP_SIMD_LANE ifn
6728      kinds are there in order to allow optimizing the initializer store
6729      and combiner sequence, e.g. if it is originally some C++ish user
6730      defined reduction, but allow the vectorizer to pattern recognize it
6731      and turn into the appropriate vectorized scan.
6732 
6733      For exclusive scan, this is slightly different:
6734      #pragma omp simd reduction(inscan,+:r)
6735      for (...)
6736        {
6737 	 use (r);
6738 	 #pragma omp scan exclusive (r)
6739 	 r += something ();
6740        }
6741      shall have body with:
6742        // Initialization for input phase, store the reduction initializer:
6743        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6744        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6745        D.2042[_21] = 0;
6746        // Actual input phase:
6747        ...
6748        r.0_5 = D.2042[_20];
6749        _6 = _4 + r.0_5;
6750        D.2042[_20] = _6;
6751        // Initialization for scan phase:
6752        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6753        _26 = D.2043[_25];
6754        D.2044[_25] = _26;
6755        _27 = D.2042[_25];
6756        _28 = _26 + _27;
6757        D.2043[_25] = _28;
6758        // Actual scan phase:
6759        ...
6760        r.1_8 = D.2044[_20];
6761        ...  */
6762 
6763   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6764     {
6765       /* Match the D.2042[_21] = 0; store above.  Just require that
6766 	 it is a constant or external definition store.  */
6767       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6768 	{
6769 	 fail_init:
6770 	  if (dump_enabled_p ())
6771 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6772 			     "unsupported OpenMP scan initializer store.\n");
6773 	  return false;
6774 	}
6775 
6776       if (! loop_vinfo->scan_map)
6777 	loop_vinfo->scan_map = new hash_map<tree, tree>;
6778       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6779       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6780       if (cached)
6781 	goto fail_init;
6782       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6783 
6784       /* These stores can be vectorized normally.  */
6785       return true;
6786     }
6787 
6788   if (rhs_dt != vect_internal_def)
6789     {
6790      fail:
6791       if (dump_enabled_p ())
6792 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6793 			 "unsupported OpenMP scan combiner pattern.\n");
6794       return false;
6795     }
6796 
6797   gimple *stmt = STMT_VINFO_STMT (stmt_info);
6798   tree rhs = gimple_assign_rhs1 (stmt);
6799   if (TREE_CODE (rhs) != SSA_NAME)
6800     goto fail;
6801 
6802   gimple *other_store_stmt = NULL;
6803   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6804   bool inscan_var_store
6805     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6806 
6807   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6808     {
6809       if (!inscan_var_store)
6810 	{
6811 	  use_operand_p use_p;
6812 	  imm_use_iterator iter;
6813 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6814 	    {
6815 	      gimple *use_stmt = USE_STMT (use_p);
6816 	      if (use_stmt == stmt || is_gimple_debug (use_stmt))
6817 		continue;
6818 	      if (gimple_bb (use_stmt) != gimple_bb (stmt)
6819 		  || !is_gimple_assign (use_stmt)
6820 		  || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6821 		  || other_store_stmt
6822 		  || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6823 		goto fail;
6824 	      other_store_stmt = use_stmt;
6825 	    }
6826 	  if (other_store_stmt == NULL)
6827 	    goto fail;
6828 	  rhs = gimple_assign_lhs (other_store_stmt);
6829 	  if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6830 	    goto fail;
6831 	}
6832     }
6833   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6834     {
6835       use_operand_p use_p;
6836       imm_use_iterator iter;
6837       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6838 	{
6839 	  gimple *use_stmt = USE_STMT (use_p);
6840 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
6841 	    continue;
6842 	  if (other_store_stmt)
6843 	    goto fail;
6844 	  other_store_stmt = use_stmt;
6845 	}
6846     }
6847   else
6848     goto fail;
6849 
6850   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6851   if (gimple_bb (def_stmt) != gimple_bb (stmt)
6852       || !is_gimple_assign (def_stmt)
6853       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6854     goto fail;
6855 
6856   enum tree_code code = gimple_assign_rhs_code (def_stmt);
6857   /* For pointer addition, we should use the normal plus for the vector
6858      operation.  */
6859   switch (code)
6860     {
6861     case POINTER_PLUS_EXPR:
6862       code = PLUS_EXPR;
6863       break;
6864     case MULT_HIGHPART_EXPR:
6865       goto fail;
6866     default:
6867       break;
6868     }
6869   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6870     goto fail;
6871 
6872   tree rhs1 = gimple_assign_rhs1 (def_stmt);
6873   tree rhs2 = gimple_assign_rhs2 (def_stmt);
6874   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6875     goto fail;
6876 
6877   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6878   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6879   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6880       || !gimple_assign_load_p (load1_stmt)
6881       || gimple_bb (load2_stmt) != gimple_bb (stmt)
6882       || !gimple_assign_load_p (load2_stmt))
6883     goto fail;
6884 
6885   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6886   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6887   if (load1_stmt_info == NULL
6888       || load2_stmt_info == NULL
6889       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6890 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6891       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6892 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6893     goto fail;
6894 
6895   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6896     {
6897       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6898       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6899 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6900 	goto fail;
6901       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6902       tree lrhs;
6903       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6904 	lrhs = rhs1;
6905       else
6906 	lrhs = rhs2;
6907       use_operand_p use_p;
6908       imm_use_iterator iter;
6909       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6910 	{
6911 	  gimple *use_stmt = USE_STMT (use_p);
6912 	  if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6913 	    continue;
6914 	  if (other_store_stmt)
6915 	    goto fail;
6916 	  other_store_stmt = use_stmt;
6917 	}
6918     }
6919 
6920   if (other_store_stmt == NULL)
6921     goto fail;
6922   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6923       || !gimple_store_p (other_store_stmt))
6924     goto fail;
6925 
6926   stmt_vec_info other_store_stmt_info
6927     = loop_vinfo->lookup_stmt (other_store_stmt);
6928   if (other_store_stmt_info == NULL
6929       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6930 	  != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6931     goto fail;
6932 
6933   gimple *stmt1 = stmt;
6934   gimple *stmt2 = other_store_stmt;
6935   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6936     std::swap (stmt1, stmt2);
6937   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6938 			    gimple_assign_rhs1 (load2_stmt)))
6939     {
6940       std::swap (rhs1, rhs2);
6941       std::swap (load1_stmt, load2_stmt);
6942       std::swap (load1_stmt_info, load2_stmt_info);
6943     }
6944   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6945 			     gimple_assign_rhs1 (load1_stmt)))
6946     goto fail;
6947 
6948   tree var3 = NULL_TREE;
6949   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6950       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6951 				gimple_assign_rhs1 (load2_stmt)))
6952     goto fail;
6953   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6954     {
6955       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6956       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6957 	  || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6958 	goto fail;
6959       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6960       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6961 	  || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6962 	  || lookup_attribute ("omp simd inscan exclusive",
6963 			       DECL_ATTRIBUTES (var3)))
6964 	goto fail;
6965     }
6966 
6967   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6968   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6969       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6970     goto fail;
6971 
6972   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6973   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6974   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6975       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6976       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6977 	 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6978     goto fail;
6979 
6980   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6981     std::swap (var1, var2);
6982 
6983   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6984     {
6985       if (!lookup_attribute ("omp simd inscan exclusive",
6986 			     DECL_ATTRIBUTES (var1)))
6987 	goto fail;
6988       var1 = var3;
6989     }
6990 
6991   if (loop_vinfo->scan_map == NULL)
6992     goto fail;
6993   tree *init = loop_vinfo->scan_map->get (var1);
6994   if (init == NULL)
6995     goto fail;
6996 
6997   /* The IL is as expected, now check if we can actually vectorize it.
6998      Inclusive scan:
6999        _26 = D.2043[_25];
7000        _27 = D.2042[_25];
7001        _28 = _26 + _27;
7002        D.2043[_25] = _28;
7003        D.2042[_25] = _28;
7004      should be vectorized as (where _40 is the vectorized rhs
7005      from the D.2042[_21] = 0; store):
7006        _30 = MEM <vector(8) int> [(int *)&D.2043];
7007        _31 = MEM <vector(8) int> [(int *)&D.2042];
7008        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7009        _33 = _31 + _32;
7010        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7011        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7012        _35 = _33 + _34;
7013        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7014        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
7015        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7016        _37 = _35 + _36;
7017        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7018        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
7019        _38 = _30 + _37;
7020        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7021        MEM <vector(8) int> [(int *)&D.2043] = _39;
7022        MEM <vector(8) int> [(int *)&D.2042] = _38;
7023      Exclusive scan:
7024        _26 = D.2043[_25];
7025        D.2044[_25] = _26;
7026        _27 = D.2042[_25];
7027        _28 = _26 + _27;
7028        D.2043[_25] = _28;
7029      should be vectorized as (where _40 is the vectorized rhs
7030      from the D.2042[_21] = 0; store):
7031        _30 = MEM <vector(8) int> [(int *)&D.2043];
7032        _31 = MEM <vector(8) int> [(int *)&D.2042];
7033        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7034        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7035        _34 = _32 + _33;
7036        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7037        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
7038        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7039        _36 = _34 + _35;
7040        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7041        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
7042        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7043        _38 = _36 + _37;
7044        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7045        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
7046        _39 = _30 + _38;
7047        _50 = _31 + _39;
7048        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7049        MEM <vector(8) int> [(int *)&D.2044] = _39;
7050        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
7051   enum machine_mode vec_mode = TYPE_MODE (vectype);
7052   optab optab = optab_for_tree_code (code, vectype, optab_default);
7053   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7054     goto fail;
7055 
7056   int units_log2 = scan_store_can_perm_p (vectype, *init);
7057   if (units_log2 == -1)
7058     goto fail;
7059 
7060   return true;
7061 }
7062 
7063 
7064 /* Function vectorizable_scan_store.
7065 
7066    Helper of vectorizable_score, arguments like on vectorizable_store.
7067    Handle only the transformation, checking is done in check_scan_store.  */
7068 
7069 static bool
vectorizable_scan_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,int ncopies)7070 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7071 			 stmt_vec_info *vec_stmt, int ncopies)
7072 {
7073   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7074   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7075   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7076   vec_info *vinfo = stmt_info->vinfo;
7077   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7078 
7079   if (dump_enabled_p ())
7080     dump_printf_loc (MSG_NOTE, vect_location,
7081 		     "transform scan store. ncopies = %d\n", ncopies);
7082 
7083   gimple *stmt = STMT_VINFO_STMT (stmt_info);
7084   tree rhs = gimple_assign_rhs1 (stmt);
7085   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7086 
7087   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7088   bool inscan_var_store
7089     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7090 
7091   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7092     {
7093       use_operand_p use_p;
7094       imm_use_iterator iter;
7095       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7096 	{
7097 	  gimple *use_stmt = USE_STMT (use_p);
7098 	  if (use_stmt == stmt || is_gimple_debug (use_stmt))
7099 	    continue;
7100 	  rhs = gimple_assign_lhs (use_stmt);
7101 	  break;
7102 	}
7103     }
7104 
7105   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7106   enum tree_code code = gimple_assign_rhs_code (def_stmt);
7107   if (code == POINTER_PLUS_EXPR)
7108     code = PLUS_EXPR;
7109   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7110 	      && commutative_tree_code (code));
7111   tree rhs1 = gimple_assign_rhs1 (def_stmt);
7112   tree rhs2 = gimple_assign_rhs2 (def_stmt);
7113   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7114   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7115   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7116   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7117   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7118   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7119   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7120   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7121   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7122 
7123   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7124     {
7125       std::swap (rhs1, rhs2);
7126       std::swap (var1, var2);
7127       std::swap (load1_dr_info, load2_dr_info);
7128     }
7129 
7130   tree *init = loop_vinfo->scan_map->get (var1);
7131   gcc_assert (init);
7132 
7133   unsigned HOST_WIDE_INT nunits;
7134   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7135     gcc_unreachable ();
7136   auto_vec<enum scan_store_kind, 16> use_whole_vector;
7137   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7138   gcc_assert (units_log2 > 0);
7139   auto_vec<tree, 16> perms;
7140   perms.quick_grow (units_log2 + 1);
7141   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7142   for (int i = 0; i <= units_log2; ++i)
7143     {
7144       unsigned HOST_WIDE_INT j, k;
7145       vec_perm_builder sel (nunits, nunits, 1);
7146       sel.quick_grow (nunits);
7147       if (i == units_log2)
7148 	for (j = 0; j < nunits; ++j)
7149 	  sel[j] = nunits - 1;
7150       else
7151 	{
7152 	  for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7153 	    sel[j] = j;
7154 	  for (k = 0; j < nunits; ++j, ++k)
7155 	    sel[j] = nunits + k;
7156 	}
7157       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7158       if (!use_whole_vector.is_empty ()
7159 	  && use_whole_vector[i] != scan_store_kind_perm)
7160 	{
7161 	  if (zero_vec == NULL_TREE)
7162 	    zero_vec = build_zero_cst (vectype);
7163 	  if (masktype == NULL_TREE
7164 	      && use_whole_vector[i] == scan_store_kind_lshift_cond)
7165 	    masktype = truth_type_for (vectype);
7166 	  perms[i] = vect_gen_perm_mask_any (vectype, indices);
7167 	}
7168       else
7169 	perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7170     }
7171 
7172   stmt_vec_info prev_stmt_info = NULL;
7173   tree vec_oprnd1 = NULL_TREE;
7174   tree vec_oprnd2 = NULL_TREE;
7175   tree vec_oprnd3 = NULL_TREE;
7176   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7177   tree dataref_offset = build_int_cst (ref_type, 0);
7178   tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7179   tree ldataref_ptr = NULL_TREE;
7180   tree orig = NULL_TREE;
7181   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7182     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7183   for (int j = 0; j < ncopies; j++)
7184     {
7185       stmt_vec_info new_stmt_info;
7186       if (j == 0)
7187 	{
7188 	  vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7189 	  if (ldataref_ptr == NULL)
7190 	    vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7191 	  vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7192 	  orig = vec_oprnd3;
7193 	}
7194       else
7195 	{
7196 	  vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7197 	  if (ldataref_ptr == NULL)
7198 	    vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7199 	  vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7200 	  if (!inscan_var_store)
7201 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7202 	}
7203 
7204       if (ldataref_ptr)
7205 	{
7206 	  vec_oprnd2 = make_ssa_name (vectype);
7207 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7208 				       unshare_expr (ldataref_ptr),
7209 				       dataref_offset);
7210 	  vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7211 	  gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7212 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7213 	  if (prev_stmt_info == NULL)
7214 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7215 	  else
7216 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7217 	  prev_stmt_info = new_stmt_info;
7218 	}
7219 
7220       tree v = vec_oprnd2;
7221       for (int i = 0; i < units_log2; ++i)
7222 	{
7223 	  tree new_temp = make_ssa_name (vectype);
7224 	  gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7225 					   (zero_vec
7226 					    && (use_whole_vector[i]
7227 						!= scan_store_kind_perm))
7228 					   ? zero_vec : vec_oprnd1, v,
7229 					   perms[i]);
7230 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7231 	  if (prev_stmt_info == NULL)
7232 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7233 	  else
7234 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7235 	  prev_stmt_info = new_stmt_info;
7236 
7237 	  if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7238 	    {
7239 	      /* Whole vector shift shifted in zero bits, but if *init
7240 		 is not initializer_zerop, we need to replace those elements
7241 		 with elements from vec_oprnd1.  */
7242 	      tree_vector_builder vb (masktype, nunits, 1);
7243 	      for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7244 		vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7245 			       ? boolean_false_node : boolean_true_node);
7246 
7247 	      tree new_temp2 = make_ssa_name (vectype);
7248 	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7249 				       new_temp, vec_oprnd1);
7250 	      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7251 	      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7252 	      prev_stmt_info = new_stmt_info;
7253 	      new_temp = new_temp2;
7254 	    }
7255 
7256 	  /* For exclusive scan, perform the perms[i] permutation once
7257 	     more.  */
7258 	  if (i == 0
7259 	      && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7260 	      && v == vec_oprnd2)
7261 	    {
7262 	      v = new_temp;
7263 	      --i;
7264 	      continue;
7265 	    }
7266 
7267 	  tree new_temp2 = make_ssa_name (vectype);
7268 	  g = gimple_build_assign (new_temp2, code, v, new_temp);
7269 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7270 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7271 	  prev_stmt_info = new_stmt_info;
7272 
7273 	  v = new_temp2;
7274 	}
7275 
7276       tree new_temp = make_ssa_name (vectype);
7277       gimple *g = gimple_build_assign (new_temp, code, orig, v);
7278       new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7279       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7280       prev_stmt_info = new_stmt_info;
7281 
7282       tree last_perm_arg = new_temp;
7283       /* For exclusive scan, new_temp computed above is the exclusive scan
7284 	 prefix sum.  Turn it into inclusive prefix sum for the broadcast
7285 	 of the last element into orig.  */
7286       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7287 	{
7288 	  last_perm_arg = make_ssa_name (vectype);
7289 	  g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7290 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7291 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7292 	  prev_stmt_info = new_stmt_info;
7293 	}
7294 
7295       orig = make_ssa_name (vectype);
7296       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7297 			       last_perm_arg, perms[units_log2]);
7298       new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7299       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7300       prev_stmt_info = new_stmt_info;
7301 
7302       if (!inscan_var_store)
7303 	{
7304 	  tree data_ref = fold_build2 (MEM_REF, vectype,
7305 				       unshare_expr (dataref_ptr),
7306 				       dataref_offset);
7307 	  vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7308 	  g = gimple_build_assign (data_ref, new_temp);
7309 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7310 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7311 	  prev_stmt_info = new_stmt_info;
7312 	}
7313     }
7314 
7315   if (inscan_var_store)
7316     for (int j = 0; j < ncopies; j++)
7317       {
7318 	if (j != 0)
7319 	  dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7320 
7321 	tree data_ref = fold_build2 (MEM_REF, vectype,
7322 				     unshare_expr (dataref_ptr),
7323 				     dataref_offset);
7324 	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7325 	gimple *g = gimple_build_assign (data_ref, orig);
7326 	stmt_vec_info new_stmt_info
7327 	  = vect_finish_stmt_generation (stmt_info, g, gsi);
7328 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7329 	prev_stmt_info = new_stmt_info;
7330       }
7331   return true;
7332 }
7333 
7334 
7335 /* Function vectorizable_store.
7336 
7337    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7338    that can be vectorized.
7339    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7340    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7341    Return true if STMT_INFO is vectorizable in this way.  */
7342 
7343 static bool
vectorizable_store(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7344 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7345 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
7346 		    stmt_vector_for_cost *cost_vec)
7347 {
7348   tree data_ref;
7349   tree op;
7350   tree vec_oprnd = NULL_TREE;
7351   tree elem_type;
7352   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7353   class loop *loop = NULL;
7354   machine_mode vec_mode;
7355   tree dummy;
7356   enum dr_alignment_support alignment_support_scheme;
7357   enum vect_def_type rhs_dt = vect_unknown_def_type;
7358   enum vect_def_type mask_dt = vect_unknown_def_type;
7359   stmt_vec_info prev_stmt_info = NULL;
7360   tree dataref_ptr = NULL_TREE;
7361   tree dataref_offset = NULL_TREE;
7362   gimple *ptr_incr = NULL;
7363   int ncopies;
7364   int j;
7365   stmt_vec_info first_stmt_info;
7366   bool grouped_store;
7367   unsigned int group_size, i;
7368   vec<tree> oprnds = vNULL;
7369   vec<tree> result_chain = vNULL;
7370   tree offset = NULL_TREE;
7371   vec<tree> vec_oprnds = vNULL;
7372   bool slp = (slp_node != NULL);
7373   unsigned int vec_num;
7374   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7375   vec_info *vinfo = stmt_info->vinfo;
7376   tree aggr_type;
7377   gather_scatter_info gs_info;
7378   poly_uint64 vf;
7379   vec_load_store_type vls_type;
7380   tree ref_type;
7381 
7382   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7383     return false;
7384 
7385   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7386       && ! vec_stmt)
7387     return false;
7388 
7389   /* Is vectorizable store? */
7390 
7391   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7392   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7393     {
7394       tree scalar_dest = gimple_assign_lhs (assign);
7395       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7396 	  && is_pattern_stmt_p (stmt_info))
7397 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
7398       if (TREE_CODE (scalar_dest) != ARRAY_REF
7399 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7400 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
7401 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
7402 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7403 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
7404 	  && TREE_CODE (scalar_dest) != MEM_REF)
7405 	return false;
7406     }
7407   else
7408     {
7409       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7410       if (!call || !gimple_call_internal_p (call))
7411 	return false;
7412 
7413       internal_fn ifn = gimple_call_internal_fn (call);
7414       if (!internal_store_fn_p (ifn))
7415 	return false;
7416 
7417       if (slp_node != NULL)
7418 	{
7419 	  if (dump_enabled_p ())
7420 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7421 			     "SLP of masked stores not supported.\n");
7422 	  return false;
7423 	}
7424 
7425       int mask_index = internal_fn_mask_index (ifn);
7426       if (mask_index >= 0)
7427 	{
7428 	  mask = gimple_call_arg (call, mask_index);
7429 	  if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
7430 				       &mask_vectype))
7431 	    return false;
7432 	}
7433     }
7434 
7435   op = vect_get_store_rhs (stmt_info);
7436 
7437   /* Cannot have hybrid store SLP -- that would mean storing to the
7438      same location twice.  */
7439   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7440 
7441   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7442   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7443 
7444   if (loop_vinfo)
7445     {
7446       loop = LOOP_VINFO_LOOP (loop_vinfo);
7447       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7448     }
7449   else
7450     vf = 1;
7451 
7452   /* Multiple types in SLP are handled by creating the appropriate number of
7453      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7454      case of SLP.  */
7455   if (slp)
7456     ncopies = 1;
7457   else
7458     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7459 
7460   gcc_assert (ncopies >= 1);
7461 
7462   /* FORNOW.  This restriction should be relaxed.  */
7463   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7464     {
7465       if (dump_enabled_p ())
7466 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7467 			 "multiple types in nested loop.\n");
7468       return false;
7469     }
7470 
7471   if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7472     return false;
7473 
7474   elem_type = TREE_TYPE (vectype);
7475   vec_mode = TYPE_MODE (vectype);
7476 
7477   if (!STMT_VINFO_DATA_REF (stmt_info))
7478     return false;
7479 
7480   vect_memory_access_type memory_access_type;
7481   if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7482 			    &memory_access_type, &gs_info))
7483     return false;
7484 
7485   if (mask)
7486     {
7487       if (memory_access_type == VMAT_CONTIGUOUS)
7488 	{
7489 	  if (!VECTOR_MODE_P (vec_mode)
7490 	      || !can_vec_mask_load_store_p (vec_mode,
7491 					     TYPE_MODE (mask_vectype), false))
7492 	    return false;
7493 	}
7494       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7495 	       && (memory_access_type != VMAT_GATHER_SCATTER
7496 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7497 	{
7498 	  if (dump_enabled_p ())
7499 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7500 			     "unsupported access type for masked store.\n");
7501 	  return false;
7502 	}
7503     }
7504   else
7505     {
7506       /* FORNOW. In some cases can vectorize even if data-type not supported
7507 	 (e.g. - array initialization with 0).  */
7508       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7509 	return false;
7510     }
7511 
7512   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7513   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7514 		   && memory_access_type != VMAT_GATHER_SCATTER
7515 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
7516   if (grouped_store)
7517     {
7518       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7519       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7520       group_size = DR_GROUP_SIZE (first_stmt_info);
7521     }
7522   else
7523     {
7524       first_stmt_info = stmt_info;
7525       first_dr_info = dr_info;
7526       group_size = vec_num = 1;
7527     }
7528 
7529   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7530     {
7531       if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7532 			     memory_access_type))
7533 	return false;
7534     }
7535 
7536   if (!vec_stmt) /* transformation not required.  */
7537     {
7538       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7539 
7540       if (loop_vinfo
7541 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7542 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7543 				  memory_access_type, &gs_info, mask);
7544 
7545       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7546       vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7547 			     vls_type, slp_node, cost_vec);
7548       return true;
7549     }
7550   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7551 
7552   /* Transform.  */
7553 
7554   ensure_base_align (dr_info);
7555 
7556   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7557     {
7558       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7559       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7560       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7561       tree ptr, var, scale, vec_mask;
7562       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7563       tree mask_halfvectype = mask_vectype;
7564       edge pe = loop_preheader_edge (loop);
7565       gimple_seq seq;
7566       basic_block new_bb;
7567       enum { NARROW, NONE, WIDEN } modifier;
7568       poly_uint64 scatter_off_nunits
7569 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7570 
7571       if (known_eq (nunits, scatter_off_nunits))
7572 	modifier = NONE;
7573       else if (known_eq (nunits * 2, scatter_off_nunits))
7574 	{
7575 	  modifier = WIDEN;
7576 
7577 	  /* Currently gathers and scatters are only supported for
7578 	     fixed-length vectors.  */
7579 	  unsigned int count = scatter_off_nunits.to_constant ();
7580 	  vec_perm_builder sel (count, count, 1);
7581 	  for (i = 0; i < (unsigned int) count; ++i)
7582 	    sel.quick_push (i | (count / 2));
7583 
7584 	  vec_perm_indices indices (sel, 1, count);
7585 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7586 						  indices);
7587 	  gcc_assert (perm_mask != NULL_TREE);
7588 	}
7589       else if (known_eq (nunits, scatter_off_nunits * 2))
7590 	{
7591 	  modifier = NARROW;
7592 
7593 	  /* Currently gathers and scatters are only supported for
7594 	     fixed-length vectors.  */
7595 	  unsigned int count = nunits.to_constant ();
7596 	  vec_perm_builder sel (count, count, 1);
7597 	  for (i = 0; i < (unsigned int) count; ++i)
7598 	    sel.quick_push (i | (count / 2));
7599 
7600 	  vec_perm_indices indices (sel, 2, count);
7601 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7602 	  gcc_assert (perm_mask != NULL_TREE);
7603 	  ncopies *= 2;
7604 
7605 	  if (mask)
7606 	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7607 	}
7608       else
7609 	gcc_unreachable ();
7610 
7611       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7612       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7613       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7614       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7615       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7616       scaletype = TREE_VALUE (arglist);
7617 
7618       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7619 			   && TREE_CODE (rettype) == VOID_TYPE);
7620 
7621       ptr = fold_convert (ptrtype, gs_info.base);
7622       if (!is_gimple_min_invariant (ptr))
7623 	{
7624 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7625 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7626 	  gcc_assert (!new_bb);
7627 	}
7628 
7629       if (mask == NULL_TREE)
7630 	{
7631 	  mask_arg = build_int_cst (masktype, -1);
7632 	  mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7633 	}
7634 
7635       scale = build_int_cst (scaletype, gs_info.scale);
7636 
7637       prev_stmt_info = NULL;
7638       for (j = 0; j < ncopies; ++j)
7639 	{
7640 	  if (j == 0)
7641 	    {
7642 	      src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7643 	      op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7644 							      stmt_info);
7645 	      if (mask)
7646 		{
7647 		  tree mask_vectype = truth_type_for (vectype);
7648 		  mask_op = vec_mask
7649 		    = vect_get_vec_def_for_operand (mask,
7650 						    stmt_info, mask_vectype);
7651 		}
7652 	    }
7653 	  else if (modifier != NONE && (j & 1))
7654 	    {
7655 	      if (modifier == WIDEN)
7656 		{
7657 		  src
7658 		    = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7659 								   vec_oprnd1);
7660 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7661 					     stmt_info, gsi);
7662 		  if (mask)
7663 		    mask_op
7664 		      = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7665 								   vec_mask);
7666 		}
7667 	      else if (modifier == NARROW)
7668 		{
7669 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7670 					      stmt_info, gsi);
7671 		  op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7672 								    vec_oprnd0);
7673 		}
7674 	      else
7675 		gcc_unreachable ();
7676 	    }
7677 	  else
7678 	    {
7679 	      src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7680 								 vec_oprnd1);
7681 	      op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7682 								vec_oprnd0);
7683 	      if (mask)
7684 		mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7685 								     vec_mask);
7686 	    }
7687 
7688 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7689 	    {
7690 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7691 				    TYPE_VECTOR_SUBPARTS (srctype)));
7692 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
7693 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7694 	      gassign *new_stmt
7695 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7696 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7697 	      src = var;
7698 	    }
7699 
7700 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7701 	    {
7702 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7703 				    TYPE_VECTOR_SUBPARTS (idxtype)));
7704 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7705 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7706 	      gassign *new_stmt
7707 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7708 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7709 	      op = var;
7710 	    }
7711 
7712 	  if (mask)
7713 	    {
7714 	      tree utype;
7715 	      mask_arg = mask_op;
7716 	      if (modifier == NARROW)
7717 		{
7718 		  var = vect_get_new_ssa_name (mask_halfvectype,
7719 					       vect_simple_var);
7720 		  gassign *new_stmt
7721 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7722 							: VEC_UNPACK_LO_EXPR,
7723 					   mask_op);
7724 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7725 		  mask_arg = var;
7726 		}
7727 	      tree optype = TREE_TYPE (mask_arg);
7728 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7729 		utype = masktype;
7730 	      else
7731 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7732 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
7733 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7734 	      gassign *new_stmt
7735 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7736 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7737 	      mask_arg = var;
7738 	      if (!useless_type_conversion_p (masktype, utype))
7739 		{
7740 		  gcc_assert (TYPE_PRECISION (utype)
7741 			      <= TYPE_PRECISION (masktype));
7742 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7743 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7744 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7745 		  mask_arg = var;
7746 		}
7747 	    }
7748 
7749 	  gcall *new_stmt
7750 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7751 	  stmt_vec_info new_stmt_info
7752 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7753 
7754 	  if (prev_stmt_info == NULL)
7755 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7756 	  else
7757 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7758 	  prev_stmt_info = new_stmt_info;
7759 	}
7760       return true;
7761     }
7762   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7763     return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7764 
7765   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7766     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7767 
7768   if (grouped_store)
7769     {
7770       /* FORNOW */
7771       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7772 
7773       /* We vectorize all the stmts of the interleaving group when we
7774 	 reach the last stmt in the group.  */
7775       if (DR_GROUP_STORE_COUNT (first_stmt_info)
7776 	  < DR_GROUP_SIZE (first_stmt_info)
7777 	  && !slp)
7778 	{
7779 	  *vec_stmt = NULL;
7780 	  return true;
7781 	}
7782 
7783       if (slp)
7784         {
7785           grouped_store = false;
7786           /* VEC_NUM is the number of vect stmts to be created for this
7787              group.  */
7788           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7789 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7790 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7791 		      == first_stmt_info);
7792 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7793 	  op = vect_get_store_rhs (first_stmt_info);
7794         }
7795       else
7796         /* VEC_NUM is the number of vect stmts to be created for this
7797            group.  */
7798 	vec_num = group_size;
7799 
7800       ref_type = get_group_alias_ptr_type (first_stmt_info);
7801     }
7802   else
7803     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7804 
7805   if (dump_enabled_p ())
7806     dump_printf_loc (MSG_NOTE, vect_location,
7807                      "transform store. ncopies = %d\n", ncopies);
7808 
7809   if (memory_access_type == VMAT_ELEMENTWISE
7810       || memory_access_type == VMAT_STRIDED_SLP)
7811     {
7812       gimple_stmt_iterator incr_gsi;
7813       bool insert_after;
7814       gimple *incr;
7815       tree offvar;
7816       tree ivstep;
7817       tree running_off;
7818       tree stride_base, stride_step, alias_off;
7819       tree vec_oprnd;
7820       tree dr_offset;
7821       unsigned int g;
7822       /* Checked by get_load_store_type.  */
7823       unsigned int const_nunits = nunits.to_constant ();
7824 
7825       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7826       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7827 
7828       dr_offset = get_dr_vinfo_offset (first_dr_info);
7829       stride_base
7830 	= fold_build_pointer_plus
7831 	    (DR_BASE_ADDRESS (first_dr_info->dr),
7832 	     size_binop (PLUS_EXPR,
7833 			 convert_to_ptrofftype (dr_offset),
7834 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7835       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7836 
7837       /* For a store with loop-invariant (but other than power-of-2)
7838          stride (i.e. not a grouped access) like so:
7839 
7840 	   for (i = 0; i < n; i += stride)
7841 	     array[i] = ...;
7842 
7843 	 we generate a new induction variable and new stores from
7844 	 the components of the (vectorized) rhs:
7845 
7846 	   for (j = 0; ; j += VF*stride)
7847 	     vectemp = ...;
7848 	     tmp1 = vectemp[0];
7849 	     array[j] = tmp1;
7850 	     tmp2 = vectemp[1];
7851 	     array[j + stride] = tmp2;
7852 	     ...
7853          */
7854 
7855       unsigned nstores = const_nunits;
7856       unsigned lnel = 1;
7857       tree ltype = elem_type;
7858       tree lvectype = vectype;
7859       if (slp)
7860 	{
7861 	  if (group_size < const_nunits
7862 	      && const_nunits % group_size == 0)
7863 	    {
7864 	      nstores = const_nunits / group_size;
7865 	      lnel = group_size;
7866 	      ltype = build_vector_type (elem_type, group_size);
7867 	      lvectype = vectype;
7868 
7869 	      /* First check if vec_extract optab doesn't support extraction
7870 		 of vector elts directly.  */
7871 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7872 	      machine_mode vmode;
7873 	      if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7874 		  || !related_vector_mode (TYPE_MODE (vectype), elmode,
7875 					   group_size).exists (&vmode)
7876 		  || (convert_optab_handler (vec_extract_optab,
7877 					     TYPE_MODE (vectype), vmode)
7878 		      == CODE_FOR_nothing))
7879 		{
7880 		  /* Try to avoid emitting an extract of vector elements
7881 		     by performing the extracts using an integer type of the
7882 		     same size, extracting from a vector of those and then
7883 		     re-interpreting it as the original vector type if
7884 		     supported.  */
7885 		  unsigned lsize
7886 		    = group_size * GET_MODE_BITSIZE (elmode);
7887 		  unsigned int lnunits = const_nunits / group_size;
7888 		  /* If we can't construct such a vector fall back to
7889 		     element extracts from the original vector type and
7890 		     element size stores.  */
7891 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
7892 		      && VECTOR_MODE_P (TYPE_MODE (vectype))
7893 		      && related_vector_mode (TYPE_MODE (vectype), elmode,
7894 					      lnunits).exists (&vmode)
7895 		      && (convert_optab_handler (vec_extract_optab,
7896 						 vmode, elmode)
7897 			  != CODE_FOR_nothing))
7898 		    {
7899 		      nstores = lnunits;
7900 		      lnel = group_size;
7901 		      ltype = build_nonstandard_integer_type (lsize, 1);
7902 		      lvectype = build_vector_type (ltype, nstores);
7903 		    }
7904 		  /* Else fall back to vector extraction anyway.
7905 		     Fewer stores are more important than avoiding spilling
7906 		     of the vector we extract from.  Compared to the
7907 		     construction case in vectorizable_load no store-forwarding
7908 		     issue exists here for reasonable archs.  */
7909 		}
7910 	    }
7911 	  else if (group_size >= const_nunits
7912 		   && group_size % const_nunits == 0)
7913 	    {
7914 	      nstores = 1;
7915 	      lnel = const_nunits;
7916 	      ltype = vectype;
7917 	      lvectype = vectype;
7918 	    }
7919 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7920 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7921 	}
7922 
7923       ivstep = stride_step;
7924       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7925 			    build_int_cst (TREE_TYPE (ivstep), vf));
7926 
7927       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7928 
7929       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7930       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7931       create_iv (stride_base, ivstep, NULL,
7932 		 loop, &incr_gsi, insert_after,
7933 		 &offvar, NULL);
7934       incr = gsi_stmt (incr_gsi);
7935       loop_vinfo->add_stmt (incr);
7936 
7937       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7938 
7939       prev_stmt_info = NULL;
7940       alias_off = build_int_cst (ref_type, 0);
7941       stmt_vec_info next_stmt_info = first_stmt_info;
7942       for (g = 0; g < group_size; g++)
7943 	{
7944 	  running_off = offvar;
7945 	  if (g)
7946 	    {
7947 	      tree size = TYPE_SIZE_UNIT (ltype);
7948 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7949 				      size);
7950 	      tree newoff = copy_ssa_name (running_off, NULL);
7951 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7952 					  running_off, pos);
7953 	      vect_finish_stmt_generation (stmt_info, incr, gsi);
7954 	      running_off = newoff;
7955 	    }
7956 	  unsigned int group_el = 0;
7957 	  unsigned HOST_WIDE_INT
7958 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7959 	  for (j = 0; j < ncopies; j++)
7960 	    {
7961 	      /* We've set op and dt above, from vect_get_store_rhs,
7962 		 and first_stmt_info == stmt_info.  */
7963 	      if (j == 0)
7964 		{
7965 		  if (slp)
7966 		    {
7967 		      vect_get_vec_defs (op, NULL_TREE, stmt_info,
7968 					 &vec_oprnds, NULL, slp_node);
7969 		      vec_oprnd = vec_oprnds[0];
7970 		    }
7971 		  else
7972 		    {
7973 		      op = vect_get_store_rhs (next_stmt_info);
7974 		      vec_oprnd = vect_get_vec_def_for_operand
7975 			(op, next_stmt_info);
7976 		    }
7977 		}
7978 	      else
7979 		{
7980 		  if (slp)
7981 		    vec_oprnd = vec_oprnds[j];
7982 		  else
7983 		    vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7984 								vec_oprnd);
7985 		}
7986 	      /* Pun the vector to extract from if necessary.  */
7987 	      if (lvectype != vectype)
7988 		{
7989 		  tree tem = make_ssa_name (lvectype);
7990 		  gimple *pun
7991 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7992 							lvectype, vec_oprnd));
7993 		  vect_finish_stmt_generation (stmt_info, pun, gsi);
7994 		  vec_oprnd = tem;
7995 		}
7996 	      for (i = 0; i < nstores; i++)
7997 		{
7998 		  tree newref, newoff;
7999 		  gimple *incr, *assign;
8000 		  tree size = TYPE_SIZE (ltype);
8001 		  /* Extract the i'th component.  */
8002 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8003 					  bitsize_int (i), size);
8004 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8005 					   size, pos);
8006 
8007 		  elem = force_gimple_operand_gsi (gsi, elem, true,
8008 						   NULL_TREE, true,
8009 						   GSI_SAME_STMT);
8010 
8011 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
8012 						 group_el * elsz);
8013 		  newref = build2 (MEM_REF, ltype,
8014 				   running_off, this_off);
8015 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8016 
8017 		  /* And store it to *running_off.  */
8018 		  assign = gimple_build_assign (newref, elem);
8019 		  stmt_vec_info assign_info
8020 		    = vect_finish_stmt_generation (stmt_info, assign, gsi);
8021 
8022 		  group_el += lnel;
8023 		  if (! slp
8024 		      || group_el == group_size)
8025 		    {
8026 		      newoff = copy_ssa_name (running_off, NULL);
8027 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8028 						  running_off, stride_step);
8029 		      vect_finish_stmt_generation (stmt_info, incr, gsi);
8030 
8031 		      running_off = newoff;
8032 		      group_el = 0;
8033 		    }
8034 		  if (g == group_size - 1
8035 		      && !slp)
8036 		    {
8037 		      if (j == 0 && i == 0)
8038 			STMT_VINFO_VEC_STMT (stmt_info)
8039 			    = *vec_stmt = assign_info;
8040 		      else
8041 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8042 		      prev_stmt_info = assign_info;
8043 		    }
8044 		}
8045 	    }
8046 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8047 	  if (slp)
8048 	    break;
8049 	}
8050 
8051       vec_oprnds.release ();
8052       return true;
8053     }
8054 
8055   auto_vec<tree> dr_chain (group_size);
8056   oprnds.create (group_size);
8057 
8058   /* Gather-scatter accesses perform only component accesses, alignment
8059      is irrelevant for them.  */
8060   if (memory_access_type == VMAT_GATHER_SCATTER)
8061     alignment_support_scheme = dr_unaligned_supported;
8062   else
8063     alignment_support_scheme
8064       = vect_supportable_dr_alignment (first_dr_info, false);
8065 
8066   gcc_assert (alignment_support_scheme);
8067   vec_loop_masks *loop_masks
8068     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8069        ? &LOOP_VINFO_MASKS (loop_vinfo)
8070        : NULL);
8071   /* Targets with store-lane instructions must not require explicit
8072      realignment.  vect_supportable_dr_alignment always returns either
8073      dr_aligned or dr_unaligned_supported for masked operations.  */
8074   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8075 	       && !mask
8076 	       && !loop_masks)
8077 	      || alignment_support_scheme == dr_aligned
8078 	      || alignment_support_scheme == dr_unaligned_supported);
8079 
8080   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8081       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8082     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8083 
8084   tree bump;
8085   tree vec_offset = NULL_TREE;
8086   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8087     {
8088       aggr_type = NULL_TREE;
8089       bump = NULL_TREE;
8090     }
8091   else if (memory_access_type == VMAT_GATHER_SCATTER)
8092     {
8093       aggr_type = elem_type;
8094       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8095 				       &bump, &vec_offset);
8096     }
8097   else
8098     {
8099       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8100 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8101       else
8102 	aggr_type = vectype;
8103       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8104 					  memory_access_type);
8105     }
8106 
8107   if (mask)
8108     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8109 
8110   /* In case the vectorization factor (VF) is bigger than the number
8111      of elements that we can fit in a vectype (nunits), we have to generate
8112      more than one vector stmt - i.e - we need to "unroll" the
8113      vector stmt by a factor VF/nunits.  For more details see documentation in
8114      vect_get_vec_def_for_copy_stmt.  */
8115 
8116   /* In case of interleaving (non-unit grouped access):
8117 
8118         S1:  &base + 2 = x2
8119         S2:  &base = x0
8120         S3:  &base + 1 = x1
8121         S4:  &base + 3 = x3
8122 
8123      We create vectorized stores starting from base address (the access of the
8124      first stmt in the chain (S2 in the above example), when the last store stmt
8125      of the chain (S4) is reached:
8126 
8127         VS1: &base = vx2
8128 	VS2: &base + vec_size*1 = vx0
8129 	VS3: &base + vec_size*2 = vx1
8130 	VS4: &base + vec_size*3 = vx3
8131 
8132      Then permutation statements are generated:
8133 
8134 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8135 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8136 	...
8137 
8138      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8139      (the order of the data-refs in the output of vect_permute_store_chain
8140      corresponds to the order of scalar stmts in the interleaving chain - see
8141      the documentation of vect_permute_store_chain()).
8142 
8143      In case of both multiple types and interleaving, above vector stores and
8144      permutation stmts are created for every copy.  The result vector stmts are
8145      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8146      STMT_VINFO_RELATED_STMT for the next copies.
8147   */
8148 
8149   prev_stmt_info = NULL;
8150   tree vec_mask = NULL_TREE;
8151   for (j = 0; j < ncopies; j++)
8152     {
8153       stmt_vec_info new_stmt_info;
8154       if (j == 0)
8155 	{
8156           if (slp)
8157             {
8158 	      /* Get vectorized arguments for SLP_NODE.  */
8159 	      vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8160 				 NULL, slp_node);
8161 
8162               vec_oprnd = vec_oprnds[0];
8163             }
8164           else
8165             {
8166 	      /* For interleaved stores we collect vectorized defs for all the
8167 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8168 		 used as an input to vect_permute_store_chain(), and OPRNDS as
8169 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8170 
8171 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8172 		 OPRNDS are of size 1.  */
8173 	      stmt_vec_info next_stmt_info = first_stmt_info;
8174 	      for (i = 0; i < group_size; i++)
8175 		{
8176 		  /* Since gaps are not supported for interleaved stores,
8177 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
8178 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
8179 		     that there is no interleaving, DR_GROUP_SIZE is 1,
8180 		     and only one iteration of the loop will be executed.  */
8181 		  op = vect_get_store_rhs (next_stmt_info);
8182 		  vec_oprnd = vect_get_vec_def_for_operand
8183 		    (op, next_stmt_info);
8184 		  dr_chain.quick_push (vec_oprnd);
8185 		  oprnds.quick_push (vec_oprnd);
8186 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8187 		}
8188 	      if (mask)
8189 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8190 							 mask_vectype);
8191 	    }
8192 
8193 	  /* We should have catched mismatched types earlier.  */
8194 	  gcc_assert (useless_type_conversion_p (vectype,
8195 						 TREE_TYPE (vec_oprnd)));
8196 	  bool simd_lane_access_p
8197 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8198 	  if (simd_lane_access_p
8199 	      && !loop_masks
8200 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8201 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8202 	      && integer_zerop (get_dr_vinfo_offset (first_dr_info))
8203 	      && integer_zerop (DR_INIT (first_dr_info->dr))
8204 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8205 					get_alias_set (TREE_TYPE (ref_type))))
8206 	    {
8207 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8208 	      dataref_offset = build_int_cst (ref_type, 0);
8209 	    }
8210 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8211 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8212 					 &dataref_ptr, &vec_offset);
8213 	  else
8214 	    dataref_ptr
8215 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8216 					  simd_lane_access_p ? loop : NULL,
8217 					  offset, &dummy, gsi, &ptr_incr,
8218 					  simd_lane_access_p, NULL_TREE, bump);
8219 	}
8220       else
8221 	{
8222 	  /* For interleaved stores we created vectorized defs for all the
8223 	     defs stored in OPRNDS in the previous iteration (previous copy).
8224 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
8225 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8226 	     next copy.
8227 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8228 	     OPRNDS are of size 1.  */
8229 	  for (i = 0; i < group_size; i++)
8230 	    {
8231 	      op = oprnds[i];
8232 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8233 	      dr_chain[i] = vec_oprnd;
8234 	      oprnds[i] = vec_oprnd;
8235 	    }
8236 	  if (mask)
8237 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8238 	  if (dataref_offset)
8239 	    dataref_offset
8240 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8241 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8242 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8243 	  else
8244 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8245 					   stmt_info, bump);
8246 	}
8247 
8248       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8249 	{
8250 	  tree vec_array;
8251 
8252 	  /* Get an array into which we can store the individual vectors.  */
8253 	  vec_array = create_vector_array (vectype, vec_num);
8254 
8255 	  /* Invalidate the current contents of VEC_ARRAY.  This should
8256 	     become an RTL clobber too, which prevents the vector registers
8257 	     from being upward-exposed.  */
8258 	  vect_clobber_variable (stmt_info, gsi, vec_array);
8259 
8260 	  /* Store the individual vectors into the array.  */
8261 	  for (i = 0; i < vec_num; i++)
8262 	    {
8263 	      vec_oprnd = dr_chain[i];
8264 	      write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8265 	    }
8266 
8267 	  tree final_mask = NULL;
8268 	  if (loop_masks)
8269 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8270 					     vectype, j);
8271 	  if (vec_mask)
8272 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8273 						  vec_mask, gsi);
8274 
8275 	  gcall *call;
8276 	  if (final_mask)
8277 	    {
8278 	      /* Emit:
8279 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8280 				     VEC_ARRAY).  */
8281 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8282 	      tree alias_ptr = build_int_cst (ref_type, align);
8283 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8284 						 dataref_ptr, alias_ptr,
8285 						 final_mask, vec_array);
8286 	    }
8287 	  else
8288 	    {
8289 	      /* Emit:
8290 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
8291 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8292 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8293 						 vec_array);
8294 	      gimple_call_set_lhs (call, data_ref);
8295 	    }
8296 	  gimple_call_set_nothrow (call, true);
8297 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8298 
8299 	  /* Record that VEC_ARRAY is now dead.  */
8300 	  vect_clobber_variable (stmt_info, gsi, vec_array);
8301 	}
8302       else
8303 	{
8304 	  new_stmt_info = NULL;
8305 	  if (grouped_store)
8306 	    {
8307 	      if (j == 0)
8308 		result_chain.create (group_size);
8309 	      /* Permute.  */
8310 	      vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8311 					&result_chain);
8312 	    }
8313 
8314 	  stmt_vec_info next_stmt_info = first_stmt_info;
8315 	  for (i = 0; i < vec_num; i++)
8316 	    {
8317 	      unsigned misalign;
8318 	      unsigned HOST_WIDE_INT align;
8319 
8320 	      tree final_mask = NULL_TREE;
8321 	      if (loop_masks)
8322 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8323 						 vec_num * ncopies,
8324 						 vectype, vec_num * j + i);
8325 	      if (vec_mask)
8326 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8327 						      vec_mask, gsi);
8328 
8329 	      if (memory_access_type == VMAT_GATHER_SCATTER)
8330 		{
8331 		  tree scale = size_int (gs_info.scale);
8332 		  gcall *call;
8333 		  if (loop_masks)
8334 		    call = gimple_build_call_internal
8335 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8336 		       scale, vec_oprnd, final_mask);
8337 		  else
8338 		    call = gimple_build_call_internal
8339 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8340 		       scale, vec_oprnd);
8341 		  gimple_call_set_nothrow (call, true);
8342 		  new_stmt_info
8343 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
8344 		  break;
8345 		}
8346 
8347 	      if (i > 0)
8348 		/* Bump the vector pointer.  */
8349 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8350 					       stmt_info, bump);
8351 
8352 	      if (slp)
8353 		vec_oprnd = vec_oprnds[i];
8354 	      else if (grouped_store)
8355 		/* For grouped stores vectorized defs are interleaved in
8356 		   vect_permute_store_chain().  */
8357 		vec_oprnd = result_chain[i];
8358 
8359 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8360 	      if (aligned_access_p (first_dr_info))
8361 		misalign = 0;
8362 	      else if (DR_MISALIGNMENT (first_dr_info) == -1)
8363 		{
8364 		  align = dr_alignment (vect_dr_behavior (first_dr_info));
8365 		  misalign = 0;
8366 		}
8367 	      else
8368 		misalign = DR_MISALIGNMENT (first_dr_info);
8369 	      if (dataref_offset == NULL_TREE
8370 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
8371 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8372 					misalign);
8373 
8374 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8375 		{
8376 		  tree perm_mask = perm_mask_for_reverse (vectype);
8377 		  tree perm_dest = vect_create_destination_var
8378 		    (vect_get_store_rhs (stmt_info), vectype);
8379 		  tree new_temp = make_ssa_name (perm_dest);
8380 
8381 		  /* Generate the permute statement.  */
8382 		  gimple *perm_stmt
8383 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8384 					   vec_oprnd, perm_mask);
8385 		  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8386 
8387 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8388 		  vec_oprnd = new_temp;
8389 		}
8390 
8391 	      /* Arguments are ready.  Create the new vector stmt.  */
8392 	      if (final_mask)
8393 		{
8394 		  align = least_bit_hwi (misalign | align);
8395 		  tree ptr = build_int_cst (ref_type, align);
8396 		  gcall *call
8397 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
8398 						  dataref_ptr, ptr,
8399 						  final_mask, vec_oprnd);
8400 		  gimple_call_set_nothrow (call, true);
8401 		  new_stmt_info
8402 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
8403 		}
8404 	      else
8405 		{
8406 		  data_ref = fold_build2 (MEM_REF, vectype,
8407 					  dataref_ptr,
8408 					  dataref_offset
8409 					  ? dataref_offset
8410 					  : build_int_cst (ref_type, 0));
8411 		  if (aligned_access_p (first_dr_info))
8412 		    ;
8413 		  else if (DR_MISALIGNMENT (first_dr_info) == -1)
8414 		    TREE_TYPE (data_ref)
8415 		      = build_aligned_type (TREE_TYPE (data_ref),
8416 					    align * BITS_PER_UNIT);
8417 		  else
8418 		    TREE_TYPE (data_ref)
8419 		      = build_aligned_type (TREE_TYPE (data_ref),
8420 					    TYPE_ALIGN (elem_type));
8421 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8422 		  gassign *new_stmt
8423 		    = gimple_build_assign (data_ref, vec_oprnd);
8424 		  new_stmt_info
8425 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8426 		}
8427 
8428 	      if (slp)
8429 		continue;
8430 
8431 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8432 	      if (!next_stmt_info)
8433 		break;
8434 	    }
8435 	}
8436       if (!slp)
8437 	{
8438 	  if (j == 0)
8439 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8440 	  else
8441 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8442 	  prev_stmt_info = new_stmt_info;
8443 	}
8444     }
8445 
8446   oprnds.release ();
8447   result_chain.release ();
8448   vec_oprnds.release ();
8449 
8450   return true;
8451 }
8452 
8453 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8454    VECTOR_CST mask.  No checks are made that the target platform supports the
8455    mask, so callers may wish to test can_vec_perm_const_p separately, or use
8456    vect_gen_perm_mask_checked.  */
8457 
8458 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8459 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8460 {
8461   tree mask_type;
8462 
8463   poly_uint64 nunits = sel.length ();
8464   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8465 
8466   mask_type = build_vector_type (ssizetype, nunits);
8467   return vec_perm_indices_to_tree (mask_type, sel);
8468 }
8469 
8470 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
8471    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
8472 
8473 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8474 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8475 {
8476   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8477   return vect_gen_perm_mask_any (vectype, sel);
8478 }
8479 
8480 /* Given a vector variable X and Y, that was generated for the scalar
8481    STMT_INFO, generate instructions to permute the vector elements of X and Y
8482    using permutation mask MASK_VEC, insert them at *GSI and return the
8483    permuted vector variable.  */
8484 
8485 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8486 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8487 		      gimple_stmt_iterator *gsi)
8488 {
8489   tree vectype = TREE_TYPE (x);
8490   tree perm_dest, data_ref;
8491   gimple *perm_stmt;
8492 
8493   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8494   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8495     perm_dest = vect_create_destination_var (scalar_dest, vectype);
8496   else
8497     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8498   data_ref = make_ssa_name (perm_dest);
8499 
8500   /* Generate the permute statement.  */
8501   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8502   vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8503 
8504   return data_ref;
8505 }
8506 
8507 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8508    inserting them on the loops preheader edge.  Returns true if we
8509    were successful in doing so (and thus STMT_INFO can be moved then),
8510    otherwise returns false.  */
8511 
8512 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8513 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8514 {
8515   ssa_op_iter i;
8516   tree op;
8517   bool any = false;
8518 
8519   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8520     {
8521       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8522       if (!gimple_nop_p (def_stmt)
8523 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8524 	{
8525 	  /* Make sure we don't need to recurse.  While we could do
8526 	     so in simple cases when there are more complex use webs
8527 	     we don't have an easy way to preserve stmt order to fulfil
8528 	     dependencies within them.  */
8529 	  tree op2;
8530 	  ssa_op_iter i2;
8531 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
8532 	    return false;
8533 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8534 	    {
8535 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8536 	      if (!gimple_nop_p (def_stmt2)
8537 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8538 		return false;
8539 	    }
8540 	  any = true;
8541 	}
8542     }
8543 
8544   if (!any)
8545     return true;
8546 
8547   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8548     {
8549       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8550       if (!gimple_nop_p (def_stmt)
8551 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8552 	{
8553 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8554 	  gsi_remove (&gsi, false);
8555 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8556 	}
8557     }
8558 
8559   return true;
8560 }
8561 
8562 /* vectorizable_load.
8563 
8564    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8565    that can be vectorized.
8566    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8567    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8568    Return true if STMT_INFO is vectorizable in this way.  */
8569 
8570 static bool
vectorizable_load(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,slp_instance slp_node_instance,stmt_vector_for_cost * cost_vec)8571 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8572 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
8573 		   slp_instance slp_node_instance,
8574 		   stmt_vector_for_cost *cost_vec)
8575 {
8576   tree scalar_dest;
8577   tree vec_dest = NULL;
8578   tree data_ref = NULL;
8579   stmt_vec_info prev_stmt_info;
8580   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8581   class loop *loop = NULL;
8582   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8583   bool nested_in_vect_loop = false;
8584   tree elem_type;
8585   tree new_temp;
8586   machine_mode mode;
8587   tree dummy;
8588   enum dr_alignment_support alignment_support_scheme;
8589   tree dataref_ptr = NULL_TREE;
8590   tree dataref_offset = NULL_TREE;
8591   gimple *ptr_incr = NULL;
8592   int ncopies;
8593   int i, j;
8594   unsigned int group_size;
8595   poly_uint64 group_gap_adj;
8596   tree msq = NULL_TREE, lsq;
8597   tree offset = NULL_TREE;
8598   tree byte_offset = NULL_TREE;
8599   tree realignment_token = NULL_TREE;
8600   gphi *phi = NULL;
8601   vec<tree> dr_chain = vNULL;
8602   bool grouped_load = false;
8603   stmt_vec_info first_stmt_info;
8604   stmt_vec_info first_stmt_info_for_drptr = NULL;
8605   bool compute_in_loop = false;
8606   class loop *at_loop;
8607   int vec_num;
8608   bool slp = (slp_node != NULL);
8609   bool slp_perm = false;
8610   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8611   poly_uint64 vf;
8612   tree aggr_type;
8613   gather_scatter_info gs_info;
8614   vec_info *vinfo = stmt_info->vinfo;
8615   tree ref_type;
8616   enum vect_def_type mask_dt = vect_unknown_def_type;
8617 
8618   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8619     return false;
8620 
8621   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8622       && ! vec_stmt)
8623     return false;
8624 
8625   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8626   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8627     {
8628       scalar_dest = gimple_assign_lhs (assign);
8629       if (TREE_CODE (scalar_dest) != SSA_NAME)
8630 	return false;
8631 
8632       tree_code code = gimple_assign_rhs_code (assign);
8633       if (code != ARRAY_REF
8634 	  && code != BIT_FIELD_REF
8635 	  && code != INDIRECT_REF
8636 	  && code != COMPONENT_REF
8637 	  && code != IMAGPART_EXPR
8638 	  && code != REALPART_EXPR
8639 	  && code != MEM_REF
8640 	  && TREE_CODE_CLASS (code) != tcc_declaration)
8641 	return false;
8642     }
8643   else
8644     {
8645       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8646       if (!call || !gimple_call_internal_p (call))
8647 	return false;
8648 
8649       internal_fn ifn = gimple_call_internal_fn (call);
8650       if (!internal_load_fn_p (ifn))
8651 	return false;
8652 
8653       scalar_dest = gimple_call_lhs (call);
8654       if (!scalar_dest)
8655 	return false;
8656 
8657       int mask_index = internal_fn_mask_index (ifn);
8658       if (mask_index >= 0)
8659 	{
8660 	  mask = gimple_call_arg (call, mask_index);
8661 	  if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
8662 				       &mask_vectype))
8663 	    return false;
8664 	}
8665     }
8666 
8667   if (!STMT_VINFO_DATA_REF (stmt_info))
8668     return false;
8669 
8670   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8671   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8672 
8673   if (loop_vinfo)
8674     {
8675       loop = LOOP_VINFO_LOOP (loop_vinfo);
8676       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8677       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8678     }
8679   else
8680     vf = 1;
8681 
8682   /* Multiple types in SLP are handled by creating the appropriate number of
8683      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
8684      case of SLP.  */
8685   if (slp)
8686     ncopies = 1;
8687   else
8688     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8689 
8690   gcc_assert (ncopies >= 1);
8691 
8692   /* FORNOW. This restriction should be relaxed.  */
8693   if (nested_in_vect_loop && ncopies > 1)
8694     {
8695       if (dump_enabled_p ())
8696         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8697                          "multiple types in nested loop.\n");
8698       return false;
8699     }
8700 
8701   /* Invalidate assumptions made by dependence analysis when vectorization
8702      on the unrolled body effectively re-orders stmts.  */
8703   if (ncopies > 1
8704       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8705       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8706 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8707     {
8708       if (dump_enabled_p ())
8709 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8710 			 "cannot perform implicit CSE when unrolling "
8711 			 "with negative dependence distance\n");
8712       return false;
8713     }
8714 
8715   elem_type = TREE_TYPE (vectype);
8716   mode = TYPE_MODE (vectype);
8717 
8718   /* FORNOW. In some cases can vectorize even if data-type not supported
8719     (e.g. - data copies).  */
8720   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8721     {
8722       if (dump_enabled_p ())
8723         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8724                          "Aligned load, but unsupported type.\n");
8725       return false;
8726     }
8727 
8728   /* Check if the load is a part of an interleaving chain.  */
8729   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8730     {
8731       grouped_load = true;
8732       /* FORNOW */
8733       gcc_assert (!nested_in_vect_loop);
8734       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8735 
8736       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8737       group_size = DR_GROUP_SIZE (first_stmt_info);
8738 
8739       /* Refuse non-SLP vectorization of SLP-only groups.  */
8740       if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8741 	{
8742 	  if (dump_enabled_p ())
8743 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8744 			     "cannot vectorize load in non-SLP mode.\n");
8745 	  return false;
8746 	}
8747 
8748       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8749 	slp_perm = true;
8750 
8751       /* Invalidate assumptions made by dependence analysis when vectorization
8752 	 on the unrolled body effectively re-orders stmts.  */
8753       if (!PURE_SLP_STMT (stmt_info)
8754 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8755 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8756 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8757 	{
8758 	  if (dump_enabled_p ())
8759 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8760 			     "cannot perform implicit CSE when performing "
8761 			     "group loads with negative dependence distance\n");
8762 	  return false;
8763 	}
8764     }
8765   else
8766     group_size = 1;
8767 
8768   vect_memory_access_type memory_access_type;
8769   if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8770 			    &memory_access_type, &gs_info))
8771     return false;
8772 
8773   if (mask)
8774     {
8775       if (memory_access_type == VMAT_CONTIGUOUS)
8776 	{
8777 	  machine_mode vec_mode = TYPE_MODE (vectype);
8778 	  if (!VECTOR_MODE_P (vec_mode)
8779 	      || !can_vec_mask_load_store_p (vec_mode,
8780 					     TYPE_MODE (mask_vectype), true))
8781 	    return false;
8782 	}
8783       else if (memory_access_type != VMAT_LOAD_STORE_LANES
8784 	       && memory_access_type != VMAT_GATHER_SCATTER)
8785 	{
8786 	  if (dump_enabled_p ())
8787 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8788 			     "unsupported access type for masked load.\n");
8789 	  return false;
8790 	}
8791     }
8792 
8793   if (!vec_stmt) /* transformation not required.  */
8794     {
8795       if (!slp)
8796 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8797 
8798       if (loop_vinfo
8799 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8800 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8801 				  memory_access_type, &gs_info, mask);
8802 
8803       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8804       vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8805 			    slp_node_instance, slp_node, cost_vec);
8806       return true;
8807     }
8808 
8809   if (!slp)
8810     gcc_assert (memory_access_type
8811 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8812 
8813   if (dump_enabled_p ())
8814     dump_printf_loc (MSG_NOTE, vect_location,
8815                      "transform load. ncopies = %d\n", ncopies);
8816 
8817   /* Transform.  */
8818 
8819   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8820   ensure_base_align (dr_info);
8821 
8822   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8823     {
8824       vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8825       return true;
8826     }
8827 
8828   if (memory_access_type == VMAT_INVARIANT)
8829     {
8830       gcc_assert (!grouped_load && !mask && !bb_vinfo);
8831       /* If we have versioned for aliasing or the loop doesn't
8832 	 have any data dependencies that would preclude this,
8833 	 then we are sure this is a loop invariant load and
8834 	 thus we can insert it on the preheader edge.  */
8835       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8836 		      && !nested_in_vect_loop
8837 		      && hoist_defs_of_uses (stmt_info, loop));
8838       if (hoist_p)
8839 	{
8840 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8841 	  if (dump_enabled_p ())
8842 	    dump_printf_loc (MSG_NOTE, vect_location,
8843 			     "hoisting out of the vectorized loop: %G", stmt);
8844 	  scalar_dest = copy_ssa_name (scalar_dest);
8845 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8846 	  gsi_insert_on_edge_immediate
8847 	    (loop_preheader_edge (loop),
8848 	     gimple_build_assign (scalar_dest, rhs));
8849 	}
8850       /* These copies are all equivalent, but currently the representation
8851 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
8852       prev_stmt_info = NULL;
8853       gimple_stmt_iterator gsi2 = *gsi;
8854       gsi_next (&gsi2);
8855       for (j = 0; j < ncopies; j++)
8856 	{
8857 	  stmt_vec_info new_stmt_info;
8858 	  if (hoist_p)
8859 	    {
8860 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
8861 					   vectype, NULL);
8862 	      gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8863 	      new_stmt_info = vinfo->add_stmt (new_stmt);
8864 	    }
8865 	  else
8866 	    {
8867 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
8868 					   vectype, &gsi2);
8869 	      new_stmt_info = vinfo->lookup_def (new_temp);
8870 	    }
8871 	  if (slp)
8872 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8873 	  else if (j == 0)
8874 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8875 	  else
8876 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8877 	  prev_stmt_info = new_stmt_info;
8878 	}
8879       return true;
8880     }
8881 
8882   if (memory_access_type == VMAT_ELEMENTWISE
8883       || memory_access_type == VMAT_STRIDED_SLP)
8884     {
8885       gimple_stmt_iterator incr_gsi;
8886       bool insert_after;
8887       gimple *incr;
8888       tree offvar;
8889       tree ivstep;
8890       tree running_off;
8891       vec<constructor_elt, va_gc> *v = NULL;
8892       tree stride_base, stride_step, alias_off;
8893       /* Checked by get_load_store_type.  */
8894       unsigned int const_nunits = nunits.to_constant ();
8895       unsigned HOST_WIDE_INT cst_offset = 0;
8896       tree dr_offset;
8897 
8898       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8899       gcc_assert (!nested_in_vect_loop);
8900 
8901       if (grouped_load)
8902 	{
8903 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8904 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8905 	}
8906       else
8907 	{
8908 	  first_stmt_info = stmt_info;
8909 	  first_dr_info = dr_info;
8910 	}
8911       if (slp && grouped_load)
8912 	{
8913 	  group_size = DR_GROUP_SIZE (first_stmt_info);
8914 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
8915 	}
8916       else
8917 	{
8918 	  if (grouped_load)
8919 	    cst_offset
8920 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8921 		 * vect_get_place_in_interleaving_chain (stmt_info,
8922 							 first_stmt_info));
8923 	  group_size = 1;
8924 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8925 	}
8926 
8927       dr_offset = get_dr_vinfo_offset (first_dr_info);
8928       stride_base
8929 	= fold_build_pointer_plus
8930 	    (DR_BASE_ADDRESS (first_dr_info->dr),
8931 	     size_binop (PLUS_EXPR,
8932 			 convert_to_ptrofftype (dr_offset),
8933 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8934       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8935 
8936       /* For a load with loop-invariant (but other than power-of-2)
8937          stride (i.e. not a grouped access) like so:
8938 
8939 	   for (i = 0; i < n; i += stride)
8940 	     ... = array[i];
8941 
8942 	 we generate a new induction variable and new accesses to
8943 	 form a new vector (or vectors, depending on ncopies):
8944 
8945 	   for (j = 0; ; j += VF*stride)
8946 	     tmp1 = array[j];
8947 	     tmp2 = array[j + stride];
8948 	     ...
8949 	     vectemp = {tmp1, tmp2, ...}
8950          */
8951 
8952       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8953 			    build_int_cst (TREE_TYPE (stride_step), vf));
8954 
8955       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8956 
8957       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8958       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8959       create_iv (stride_base, ivstep, NULL,
8960 		 loop, &incr_gsi, insert_after,
8961 		 &offvar, NULL);
8962       incr = gsi_stmt (incr_gsi);
8963       loop_vinfo->add_stmt (incr);
8964 
8965       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8966 
8967       prev_stmt_info = NULL;
8968       running_off = offvar;
8969       alias_off = build_int_cst (ref_type, 0);
8970       int nloads = const_nunits;
8971       int lnel = 1;
8972       tree ltype = TREE_TYPE (vectype);
8973       tree lvectype = vectype;
8974       auto_vec<tree> dr_chain;
8975       if (memory_access_type == VMAT_STRIDED_SLP)
8976 	{
8977 	  if (group_size < const_nunits)
8978 	    {
8979 	      /* First check if vec_init optab supports construction from vector
8980 		 elts directly.  Otherwise avoid emitting a constructor of
8981 		 vector elements by performing the loads using an integer type
8982 		 of the same size, constructing a vector of those and then
8983 		 re-interpreting it as the original vector type.  This avoids a
8984 		 huge runtime penalty due to the general inability to perform
8985 		 store forwarding from smaller stores to a larger load.  */
8986 	      tree ptype;
8987 	      tree vtype
8988 		= vector_vector_composition_type (vectype,
8989 						  const_nunits / group_size,
8990 						  &ptype);
8991 	      if (vtype != NULL_TREE)
8992 		{
8993 		  nloads = const_nunits / group_size;
8994 		  lnel = group_size;
8995 		  lvectype = vtype;
8996 		  ltype = ptype;
8997 		}
8998 	    }
8999 	  else
9000 	    {
9001 	      nloads = 1;
9002 	      lnel = const_nunits;
9003 	      ltype = vectype;
9004 	    }
9005 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9006 	}
9007       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
9008       else if (nloads == 1)
9009 	ltype = vectype;
9010 
9011       if (slp)
9012 	{
9013 	  /* For SLP permutation support we need to load the whole group,
9014 	     not only the number of vector stmts the permutation result
9015 	     fits in.  */
9016 	  if (slp_perm)
9017 	    {
9018 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9019 		 variable VF.  */
9020 	      unsigned int const_vf = vf.to_constant ();
9021 	      ncopies = CEIL (group_size * const_vf, const_nunits);
9022 	      dr_chain.create (ncopies);
9023 	    }
9024 	  else
9025 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9026 	}
9027       unsigned int group_el = 0;
9028       unsigned HOST_WIDE_INT
9029 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9030       for (j = 0; j < ncopies; j++)
9031 	{
9032 	  if (nloads > 1)
9033 	    vec_alloc (v, nloads);
9034 	  stmt_vec_info new_stmt_info = NULL;
9035 	  for (i = 0; i < nloads; i++)
9036 	    {
9037 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
9038 					     group_el * elsz + cst_offset);
9039 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9040 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9041 	      gassign *new_stmt
9042 		= gimple_build_assign (make_ssa_name (ltype), data_ref);
9043 	      new_stmt_info
9044 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9045 	      if (nloads > 1)
9046 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9047 					gimple_assign_lhs (new_stmt));
9048 
9049 	      group_el += lnel;
9050 	      if (! slp
9051 		  || group_el == group_size)
9052 		{
9053 		  tree newoff = copy_ssa_name (running_off);
9054 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9055 						      running_off, stride_step);
9056 		  vect_finish_stmt_generation (stmt_info, incr, gsi);
9057 
9058 		  running_off = newoff;
9059 		  group_el = 0;
9060 		}
9061 	    }
9062 	  if (nloads > 1)
9063 	    {
9064 	      tree vec_inv = build_constructor (lvectype, v);
9065 	      new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9066 	      new_stmt_info = vinfo->lookup_def (new_temp);
9067 	      if (lvectype != vectype)
9068 		{
9069 		  gassign *new_stmt
9070 		    = gimple_build_assign (make_ssa_name (vectype),
9071 					   VIEW_CONVERT_EXPR,
9072 					   build1 (VIEW_CONVERT_EXPR,
9073 						   vectype, new_temp));
9074 		  new_stmt_info
9075 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9076 		}
9077 	    }
9078 
9079 	  if (slp)
9080 	    {
9081 	      if (slp_perm)
9082 		dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9083 	      else
9084 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9085 	    }
9086 	  else
9087 	    {
9088 	      if (j == 0)
9089 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9090 	      else
9091 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9092 	      prev_stmt_info = new_stmt_info;
9093 	    }
9094 	}
9095       if (slp_perm)
9096 	{
9097 	  unsigned n_perms;
9098 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9099 					slp_node_instance, false, &n_perms);
9100 	}
9101       return true;
9102     }
9103 
9104   if (memory_access_type == VMAT_GATHER_SCATTER
9105       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9106     grouped_load = false;
9107 
9108   if (grouped_load)
9109     {
9110       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9111       group_size = DR_GROUP_SIZE (first_stmt_info);
9112       /* For SLP vectorization we directly vectorize a subchain
9113          without permutation.  */
9114       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9115 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9116       /* For BB vectorization always use the first stmt to base
9117 	 the data ref pointer on.  */
9118       if (bb_vinfo)
9119 	first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9120 
9121       /* Check if the chain of loads is already vectorized.  */
9122       if (STMT_VINFO_VEC_STMT (first_stmt_info)
9123 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9124 	     ???  But we can only do so if there is exactly one
9125 	     as we have no way to get at the rest.  Leave the CSE
9126 	     opportunity alone.
9127 	     ???  With the group load eventually participating
9128 	     in multiple different permutations (having multiple
9129 	     slp nodes which refer to the same group) the CSE
9130 	     is even wrong code.  See PR56270.  */
9131 	  && !slp)
9132 	{
9133 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9134 	  return true;
9135 	}
9136       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9137       group_gap_adj = 0;
9138 
9139       /* VEC_NUM is the number of vect stmts to be created for this group.  */
9140       if (slp)
9141 	{
9142 	  grouped_load = false;
9143 	  /* If an SLP permutation is from N elements to N elements,
9144 	     and if one vector holds a whole number of N, we can load
9145 	     the inputs to the permutation in the same way as an
9146 	     unpermuted sequence.  In other cases we need to load the
9147 	     whole group, not only the number of vector stmts the
9148 	     permutation result fits in.  */
9149 	  if (slp_perm
9150 	      && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9151 		  || !multiple_p (nunits, group_size)))
9152 	    {
9153 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9154 		 variable VF; see vect_transform_slp_perm_load.  */
9155 	      unsigned int const_vf = vf.to_constant ();
9156 	      unsigned int const_nunits = nunits.to_constant ();
9157 	      vec_num = CEIL (group_size * const_vf, const_nunits);
9158 	      group_gap_adj = vf * group_size - nunits * vec_num;
9159 	    }
9160 	  else
9161 	    {
9162 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9163 	      group_gap_adj
9164 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9165 	    }
9166     	}
9167       else
9168 	vec_num = group_size;
9169 
9170       ref_type = get_group_alias_ptr_type (first_stmt_info);
9171     }
9172   else
9173     {
9174       first_stmt_info = stmt_info;
9175       first_dr_info = dr_info;
9176       group_size = vec_num = 1;
9177       group_gap_adj = 0;
9178       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9179     }
9180 
9181   /* Gather-scatter accesses perform only component accesses, alignment
9182      is irrelevant for them.  */
9183   if (memory_access_type == VMAT_GATHER_SCATTER)
9184     alignment_support_scheme = dr_unaligned_supported;
9185   else
9186     alignment_support_scheme
9187       = vect_supportable_dr_alignment (first_dr_info, false);
9188 
9189   gcc_assert (alignment_support_scheme);
9190   vec_loop_masks *loop_masks
9191     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9192        ? &LOOP_VINFO_MASKS (loop_vinfo)
9193        : NULL);
9194   /* Targets with store-lane instructions must not require explicit
9195      realignment.  vect_supportable_dr_alignment always returns either
9196      dr_aligned or dr_unaligned_supported for masked operations.  */
9197   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9198 	       && !mask
9199 	       && !loop_masks)
9200 	      || alignment_support_scheme == dr_aligned
9201 	      || alignment_support_scheme == dr_unaligned_supported);
9202 
9203   /* In case the vectorization factor (VF) is bigger than the number
9204      of elements that we can fit in a vectype (nunits), we have to generate
9205      more than one vector stmt - i.e - we need to "unroll" the
9206      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
9207      from one copy of the vector stmt to the next, in the field
9208      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
9209      stages to find the correct vector defs to be used when vectorizing
9210      stmts that use the defs of the current stmt.  The example below
9211      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9212      need to create 4 vectorized stmts):
9213 
9214      before vectorization:
9215                                 RELATED_STMT    VEC_STMT
9216         S1:     x = memref      -               -
9217         S2:     z = x + 1       -               -
9218 
9219      step 1: vectorize stmt S1:
9220         We first create the vector stmt VS1_0, and, as usual, record a
9221         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9222         Next, we create the vector stmt VS1_1, and record a pointer to
9223         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9224         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
9225         stmts and pointers:
9226                                 RELATED_STMT    VEC_STMT
9227         VS1_0:  vx0 = memref0   VS1_1           -
9228         VS1_1:  vx1 = memref1   VS1_2           -
9229         VS1_2:  vx2 = memref2   VS1_3           -
9230         VS1_3:  vx3 = memref3   -               -
9231         S1:     x = load        -               VS1_0
9232         S2:     z = x + 1       -               -
9233 
9234      See in documentation in vect_get_vec_def_for_stmt_copy for how the
9235      information we recorded in RELATED_STMT field is used to vectorize
9236      stmt S2.  */
9237 
9238   /* In case of interleaving (non-unit grouped access):
9239 
9240      S1:  x2 = &base + 2
9241      S2:  x0 = &base
9242      S3:  x1 = &base + 1
9243      S4:  x3 = &base + 3
9244 
9245      Vectorized loads are created in the order of memory accesses
9246      starting from the access of the first stmt of the chain:
9247 
9248      VS1: vx0 = &base
9249      VS2: vx1 = &base + vec_size*1
9250      VS3: vx3 = &base + vec_size*2
9251      VS4: vx4 = &base + vec_size*3
9252 
9253      Then permutation statements are generated:
9254 
9255      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9256      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9257        ...
9258 
9259      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9260      (the order of the data-refs in the output of vect_permute_load_chain
9261      corresponds to the order of scalar stmts in the interleaving chain - see
9262      the documentation of vect_permute_load_chain()).
9263      The generation of permutation stmts and recording them in
9264      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9265 
9266      In case of both multiple types and interleaving, the vector loads and
9267      permutation stmts above are created for every copy.  The result vector
9268      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9269      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
9270 
9271   /* If the data reference is aligned (dr_aligned) or potentially unaligned
9272      on a target that supports unaligned accesses (dr_unaligned_supported)
9273      we generate the following code:
9274          p = initial_addr;
9275          indx = 0;
9276          loop {
9277 	   p = p + indx * vectype_size;
9278            vec_dest = *(p);
9279            indx = indx + 1;
9280          }
9281 
9282      Otherwise, the data reference is potentially unaligned on a target that
9283      does not support unaligned accesses (dr_explicit_realign_optimized) -
9284      then generate the following code, in which the data in each iteration is
9285      obtained by two vector loads, one from the previous iteration, and one
9286      from the current iteration:
9287          p1 = initial_addr;
9288          msq_init = *(floor(p1))
9289          p2 = initial_addr + VS - 1;
9290          realignment_token = call target_builtin;
9291          indx = 0;
9292          loop {
9293            p2 = p2 + indx * vectype_size
9294            lsq = *(floor(p2))
9295            vec_dest = realign_load (msq, lsq, realignment_token)
9296            indx = indx + 1;
9297            msq = lsq;
9298          }   */
9299 
9300   /* If the misalignment remains the same throughout the execution of the
9301      loop, we can create the init_addr and permutation mask at the loop
9302      preheader.  Otherwise, it needs to be created inside the loop.
9303      This can only occur when vectorizing memory accesses in the inner-loop
9304      nested within an outer-loop that is being vectorized.  */
9305 
9306   if (nested_in_vect_loop
9307       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9308 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
9309     {
9310       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9311       compute_in_loop = true;
9312     }
9313 
9314   bool diff_first_stmt_info
9315     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9316 
9317   if ((alignment_support_scheme == dr_explicit_realign_optimized
9318        || alignment_support_scheme == dr_explicit_realign)
9319       && !compute_in_loop)
9320     {
9321       /* If we have different first_stmt_info, we can't set up realignment
9322 	 here, since we can't guarantee first_stmt_info DR has been
9323 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9324 	 distance from first_stmt_info DR instead as below.  */
9325       if (!diff_first_stmt_info)
9326 	msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
9327 				      alignment_support_scheme, NULL_TREE,
9328 				      &at_loop);
9329       if (alignment_support_scheme == dr_explicit_realign_optimized)
9330 	{
9331 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9332 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9333 				    size_one_node);
9334 	  gcc_assert (!first_stmt_info_for_drptr);
9335 	}
9336     }
9337   else
9338     at_loop = loop;
9339 
9340   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9341     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9342 
9343   tree bump;
9344   tree vec_offset = NULL_TREE;
9345   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9346     {
9347       aggr_type = NULL_TREE;
9348       bump = NULL_TREE;
9349     }
9350   else if (memory_access_type == VMAT_GATHER_SCATTER)
9351     {
9352       aggr_type = elem_type;
9353       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9354 				       &bump, &vec_offset);
9355     }
9356   else
9357     {
9358       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9359 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9360       else
9361 	aggr_type = vectype;
9362       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9363 					  memory_access_type);
9364     }
9365 
9366   tree vec_mask = NULL_TREE;
9367   prev_stmt_info = NULL;
9368   poly_uint64 group_elt = 0;
9369   for (j = 0; j < ncopies; j++)
9370     {
9371       stmt_vec_info new_stmt_info = NULL;
9372       /* 1. Create the vector or array pointer update chain.  */
9373       if (j == 0)
9374 	{
9375 	  bool simd_lane_access_p
9376 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9377 	  if (simd_lane_access_p
9378 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9379 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9380 	      && integer_zerop (get_dr_vinfo_offset (first_dr_info))
9381 	      && integer_zerop (DR_INIT (first_dr_info->dr))
9382 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
9383 					get_alias_set (TREE_TYPE (ref_type)))
9384 	      && (alignment_support_scheme == dr_aligned
9385 		  || alignment_support_scheme == dr_unaligned_supported))
9386 	    {
9387 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9388 	      dataref_offset = build_int_cst (ref_type, 0);
9389 	    }
9390 	  else if (diff_first_stmt_info)
9391 	    {
9392 	      dataref_ptr
9393 		= vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9394 					    aggr_type, at_loop, offset, &dummy,
9395 					    gsi, &ptr_incr, simd_lane_access_p,
9396 					    byte_offset, bump);
9397 	      /* Adjust the pointer by the difference to first_stmt.  */
9398 	      data_reference_p ptrdr
9399 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9400 	      tree diff
9401 		= fold_convert (sizetype,
9402 				size_binop (MINUS_EXPR,
9403 					    DR_INIT (first_dr_info->dr),
9404 					    DR_INIT (ptrdr)));
9405 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9406 					     stmt_info, diff);
9407 	      if (alignment_support_scheme == dr_explicit_realign)
9408 		{
9409 		  msq = vect_setup_realignment (first_stmt_info_for_drptr, gsi,
9410 						&realignment_token,
9411 						alignment_support_scheme,
9412 						dataref_ptr, &at_loop);
9413 		  gcc_assert (!compute_in_loop);
9414 		}
9415 	    }
9416 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9417 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9418 					 &dataref_ptr, &vec_offset);
9419 	  else
9420 	    dataref_ptr
9421 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9422 					  offset, &dummy, gsi, &ptr_incr,
9423 					  simd_lane_access_p,
9424 					  byte_offset, bump);
9425 	  if (mask)
9426 	    {
9427 	      if (slp_node)
9428 		{
9429 		  auto_vec<vec<tree> > vec_defs (1);
9430 		  vect_get_slp_defs (slp_node, &vec_defs);
9431 		  vec_mask = vec_defs[0][0];
9432 		}
9433 	      else
9434 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9435 							 mask_vectype);
9436 	    }
9437 	}
9438       else
9439 	{
9440 	  if (dataref_offset)
9441 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9442 					      bump);
9443 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9444 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9445 	  else
9446 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9447 					   stmt_info, bump);
9448 	  if (mask)
9449 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9450 	}
9451 
9452       if (grouped_load || slp_perm)
9453 	dr_chain.create (vec_num);
9454 
9455       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9456 	{
9457 	  tree vec_array;
9458 
9459 	  vec_array = create_vector_array (vectype, vec_num);
9460 
9461 	  tree final_mask = NULL_TREE;
9462 	  if (loop_masks)
9463 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9464 					     vectype, j);
9465 	  if (vec_mask)
9466 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9467 						  vec_mask, gsi);
9468 
9469 	  gcall *call;
9470 	  if (final_mask)
9471 	    {
9472 	      /* Emit:
9473 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9474 		                                VEC_MASK).  */
9475 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9476 	      tree alias_ptr = build_int_cst (ref_type, align);
9477 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9478 						 dataref_ptr, alias_ptr,
9479 						 final_mask);
9480 	    }
9481 	  else
9482 	    {
9483 	      /* Emit:
9484 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
9485 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9486 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9487 	    }
9488 	  gimple_call_set_lhs (call, vec_array);
9489 	  gimple_call_set_nothrow (call, true);
9490 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9491 
9492 	  /* Extract each vector into an SSA_NAME.  */
9493 	  for (i = 0; i < vec_num; i++)
9494 	    {
9495 	      new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9496 					    vec_array, i);
9497 	      dr_chain.quick_push (new_temp);
9498 	    }
9499 
9500 	  /* Record the mapping between SSA_NAMEs and statements.  */
9501 	  vect_record_grouped_load_vectors (stmt_info, dr_chain);
9502 
9503 	  /* Record that VEC_ARRAY is now dead.  */
9504 	  vect_clobber_variable (stmt_info, gsi, vec_array);
9505 	}
9506       else
9507 	{
9508 	  for (i = 0; i < vec_num; i++)
9509 	    {
9510 	      tree final_mask = NULL_TREE;
9511 	      if (loop_masks
9512 		  && memory_access_type != VMAT_INVARIANT)
9513 		final_mask = vect_get_loop_mask (gsi, loop_masks,
9514 						 vec_num * ncopies,
9515 						 vectype, vec_num * j + i);
9516 	      if (vec_mask)
9517 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9518 						      vec_mask, gsi);
9519 
9520 	      if (i > 0)
9521 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9522 					       stmt_info, bump);
9523 
9524 	      /* 2. Create the vector-load in the loop.  */
9525 	      gimple *new_stmt = NULL;
9526 	      switch (alignment_support_scheme)
9527 		{
9528 		case dr_aligned:
9529 		case dr_unaligned_supported:
9530 		  {
9531 		    unsigned int misalign;
9532 		    unsigned HOST_WIDE_INT align;
9533 
9534 		    if (memory_access_type == VMAT_GATHER_SCATTER)
9535 		      {
9536 			tree zero = build_zero_cst (vectype);
9537 			tree scale = size_int (gs_info.scale);
9538 			gcall *call;
9539 			if (loop_masks)
9540 			  call = gimple_build_call_internal
9541 			    (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9542 			     vec_offset, scale, zero, final_mask);
9543 			else
9544 			  call = gimple_build_call_internal
9545 			    (IFN_GATHER_LOAD, 4, dataref_ptr,
9546 			     vec_offset, scale, zero);
9547 			gimple_call_set_nothrow (call, true);
9548 			new_stmt = call;
9549 			data_ref = NULL_TREE;
9550 			break;
9551 		      }
9552 
9553 		    align =
9554 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9555 		    if (alignment_support_scheme == dr_aligned)
9556 		      {
9557 			gcc_assert (aligned_access_p (first_dr_info));
9558 			misalign = 0;
9559 		      }
9560 		    else if (DR_MISALIGNMENT (first_dr_info) == -1)
9561 		      {
9562 			align = dr_alignment
9563 			  (vect_dr_behavior (first_dr_info));
9564 			misalign = 0;
9565 		      }
9566 		    else
9567 		      misalign = DR_MISALIGNMENT (first_dr_info);
9568 		    if (dataref_offset == NULL_TREE
9569 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
9570 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9571 					      align, misalign);
9572 
9573 		    if (final_mask)
9574 		      {
9575 			align = least_bit_hwi (misalign | align);
9576 			tree ptr = build_int_cst (ref_type, align);
9577 			gcall *call
9578 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9579 							dataref_ptr, ptr,
9580 							final_mask);
9581 			gimple_call_set_nothrow (call, true);
9582 			new_stmt = call;
9583 			data_ref = NULL_TREE;
9584 		      }
9585 		    else
9586 		      {
9587 			tree ltype = vectype;
9588 			tree new_vtype = NULL_TREE;
9589 			/* If there's no peeling for gaps but we have a gap
9590 			   with slp loads then load the lower half of the
9591 			   vector only.  See get_group_load_store_type for
9592 			   when we apply this optimization.  */
9593 			if (slp
9594 			    && loop_vinfo
9595 			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9596 			    && DR_GROUP_GAP (first_stmt_info) != 0
9597 			    && known_eq (nunits,
9598 					 (group_size
9599 					  - DR_GROUP_GAP (first_stmt_info)) * 2)
9600 			    && known_eq (nunits, group_size))
9601 			  {
9602 			    tree half_vtype;
9603 			    new_vtype
9604 			      = vector_vector_composition_type (vectype, 2,
9605 								&half_vtype);
9606 			    if (new_vtype != NULL_TREE)
9607 			      ltype = half_vtype;
9608 			  }
9609 			tree offset
9610 			  = (dataref_offset ? dataref_offset
9611 					    : build_int_cst (ref_type, 0));
9612 			if (ltype != vectype
9613 			    && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9614 			  {
9615 			    unsigned HOST_WIDE_INT gap
9616 			      = DR_GROUP_GAP (first_stmt_info);
9617 			    gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9618 			    tree gapcst = build_int_cst (ref_type, gap);
9619 			    offset = size_binop (PLUS_EXPR, offset, gapcst);
9620 			  }
9621 			data_ref
9622 			  = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9623 			if (alignment_support_scheme == dr_aligned)
9624 			  ;
9625 			else if (DR_MISALIGNMENT (first_dr_info) == -1)
9626 			  TREE_TYPE (data_ref)
9627 			    = build_aligned_type (TREE_TYPE (data_ref),
9628 						  align * BITS_PER_UNIT);
9629 			else
9630 			  TREE_TYPE (data_ref)
9631 			    = build_aligned_type (TREE_TYPE (data_ref),
9632 						  TYPE_ALIGN (elem_type));
9633 			if (ltype != vectype)
9634 			  {
9635 			    vect_copy_ref_info (data_ref,
9636 						DR_REF (first_dr_info->dr));
9637 			    tree tem = make_ssa_name (ltype);
9638 			    new_stmt = gimple_build_assign (tem, data_ref);
9639 			    vect_finish_stmt_generation (stmt_info, new_stmt,
9640 							 gsi);
9641 			    data_ref = NULL;
9642 			    vec<constructor_elt, va_gc> *v;
9643 			    vec_alloc (v, 2);
9644 			    if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9645 			      {
9646 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9647 							build_zero_cst (ltype));
9648 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9649 			      }
9650 			    else
9651 			      {
9652 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9653 				CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9654 							build_zero_cst (ltype));
9655 			      }
9656 			    gcc_assert (new_vtype != NULL_TREE);
9657 			    if (new_vtype == vectype)
9658 			      new_stmt = gimple_build_assign (
9659 				vec_dest, build_constructor (vectype, v));
9660 			    else
9661 			      {
9662 				tree new_vname = make_ssa_name (new_vtype);
9663 				new_stmt = gimple_build_assign (
9664 				  new_vname, build_constructor (new_vtype, v));
9665 				vect_finish_stmt_generation (stmt_info,
9666 							     new_stmt, gsi);
9667 				new_stmt = gimple_build_assign (
9668 				  vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9669 						    new_vname));
9670 			      }
9671 			  }
9672 		      }
9673 		    break;
9674 		  }
9675 		case dr_explicit_realign:
9676 		  {
9677 		    tree ptr, bump;
9678 
9679 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9680 
9681 		    if (compute_in_loop)
9682 		      msq = vect_setup_realignment (first_stmt_info, gsi,
9683 						    &realignment_token,
9684 						    dr_explicit_realign,
9685 						    dataref_ptr, NULL);
9686 
9687 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9688 		      ptr = copy_ssa_name (dataref_ptr);
9689 		    else
9690 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9691 		    // For explicit realign the target alignment should be
9692 		    // known at compile time.
9693 		    unsigned HOST_WIDE_INT align =
9694 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9695 		    new_stmt = gimple_build_assign
9696 				 (ptr, BIT_AND_EXPR, dataref_ptr,
9697 				  build_int_cst
9698 				  (TREE_TYPE (dataref_ptr),
9699 				   -(HOST_WIDE_INT) align));
9700 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9701 		    data_ref
9702 		      = build2 (MEM_REF, vectype, ptr,
9703 				build_int_cst (ref_type, 0));
9704 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9705 		    vec_dest = vect_create_destination_var (scalar_dest,
9706 							    vectype);
9707 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
9708 		    new_temp = make_ssa_name (vec_dest, new_stmt);
9709 		    gimple_assign_set_lhs (new_stmt, new_temp);
9710 		    gimple_move_vops (new_stmt, stmt_info->stmt);
9711 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9712 		    msq = new_temp;
9713 
9714 		    bump = size_binop (MULT_EXPR, vs,
9715 				       TYPE_SIZE_UNIT (elem_type));
9716 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
9717 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9718 					   stmt_info, bump);
9719 		    new_stmt = gimple_build_assign
9720 				 (NULL_TREE, BIT_AND_EXPR, ptr,
9721 				  build_int_cst
9722 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9723 		    ptr = copy_ssa_name (ptr, new_stmt);
9724 		    gimple_assign_set_lhs (new_stmt, ptr);
9725 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9726 		    data_ref
9727 		      = build2 (MEM_REF, vectype, ptr,
9728 				build_int_cst (ref_type, 0));
9729 		    break;
9730 		  }
9731 		case dr_explicit_realign_optimized:
9732 		  {
9733 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
9734 		      new_temp = copy_ssa_name (dataref_ptr);
9735 		    else
9736 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9737 		    // We should only be doing this if we know the target
9738 		    // alignment at compile time.
9739 		    unsigned HOST_WIDE_INT align =
9740 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9741 		    new_stmt = gimple_build_assign
9742 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
9743 		       build_int_cst (TREE_TYPE (dataref_ptr),
9744 				     -(HOST_WIDE_INT) align));
9745 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9746 		    data_ref
9747 		      = build2 (MEM_REF, vectype, new_temp,
9748 				build_int_cst (ref_type, 0));
9749 		    break;
9750 		  }
9751 		default:
9752 		  gcc_unreachable ();
9753 		}
9754 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
9755 	      /* DATA_REF is null if we've already built the statement.  */
9756 	      if (data_ref)
9757 		{
9758 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9759 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
9760 		}
9761 	      new_temp = make_ssa_name (vec_dest, new_stmt);
9762 	      gimple_set_lhs (new_stmt, new_temp);
9763 	      new_stmt_info
9764 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9765 
9766 	      /* 3. Handle explicit realignment if necessary/supported.
9767 		 Create in loop:
9768 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
9769 	      if (alignment_support_scheme == dr_explicit_realign_optimized
9770 		  || alignment_support_scheme == dr_explicit_realign)
9771 		{
9772 		  lsq = gimple_assign_lhs (new_stmt);
9773 		  if (!realignment_token)
9774 		    realignment_token = dataref_ptr;
9775 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
9776 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9777 						  msq, lsq, realignment_token);
9778 		  new_temp = make_ssa_name (vec_dest, new_stmt);
9779 		  gimple_assign_set_lhs (new_stmt, new_temp);
9780 		  new_stmt_info
9781 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9782 
9783 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
9784 		    {
9785 		      gcc_assert (phi);
9786 		      if (i == vec_num - 1 && j == ncopies - 1)
9787 			add_phi_arg (phi, lsq,
9788 				     loop_latch_edge (containing_loop),
9789 				     UNKNOWN_LOCATION);
9790 		      msq = lsq;
9791 		    }
9792 		}
9793 
9794 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9795 		{
9796 		  tree perm_mask = perm_mask_for_reverse (vectype);
9797 		  new_temp = permute_vec_elements (new_temp, new_temp,
9798 						   perm_mask, stmt_info, gsi);
9799 		  new_stmt_info = vinfo->lookup_def (new_temp);
9800 		}
9801 
9802 	      /* Collect vector loads and later create their permutation in
9803 		 vect_transform_grouped_load ().  */
9804 	      if (grouped_load || slp_perm)
9805 		dr_chain.quick_push (new_temp);
9806 
9807 	      /* Store vector loads in the corresponding SLP_NODE.  */
9808 	      if (slp && !slp_perm)
9809 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9810 
9811 	      /* With SLP permutation we load the gaps as well, without
9812 	         we need to skip the gaps after we manage to fully load
9813 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
9814 	      group_elt += nunits;
9815 	      if (maybe_ne (group_gap_adj, 0U)
9816 		  && !slp_perm
9817 		  && known_eq (group_elt, group_size - group_gap_adj))
9818 		{
9819 		  poly_wide_int bump_val
9820 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9821 		       * group_gap_adj);
9822 		  tree bump = wide_int_to_tree (sizetype, bump_val);
9823 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9824 						 stmt_info, bump);
9825 		  group_elt = 0;
9826 		}
9827 	    }
9828 	  /* Bump the vector pointer to account for a gap or for excess
9829 	     elements loaded for a permuted SLP load.  */
9830 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9831 	    {
9832 	      poly_wide_int bump_val
9833 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9834 		   * group_gap_adj);
9835 	      tree bump = wide_int_to_tree (sizetype, bump_val);
9836 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9837 					     stmt_info, bump);
9838 	    }
9839 	}
9840 
9841       if (slp && !slp_perm)
9842 	continue;
9843 
9844       if (slp_perm)
9845         {
9846 	  unsigned n_perms;
9847           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9848                                              slp_node_instance, false,
9849 					     &n_perms))
9850             {
9851               dr_chain.release ();
9852               return false;
9853             }
9854         }
9855       else
9856         {
9857           if (grouped_load)
9858   	    {
9859 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
9860 		vect_transform_grouped_load (stmt_info, dr_chain,
9861 					     group_size, gsi);
9862 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9863 	    }
9864           else
9865 	    {
9866 	      if (j == 0)
9867 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9868 	      else
9869 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9870 	      prev_stmt_info = new_stmt_info;
9871 	    }
9872         }
9873       dr_chain.release ();
9874     }
9875 
9876   return true;
9877 }
9878 
9879 /* Function vect_is_simple_cond.
9880 
9881    Input:
9882    LOOP - the loop that is being vectorized.
9883    COND - Condition that is checked for simple use.
9884 
9885    Output:
9886    *COMP_VECTYPE - the vector type for the comparison.
9887    *DTS - The def types for the arguments of the comparison
9888 
9889    Returns whether a COND can be vectorized.  Checks whether
9890    condition operands are supportable using vec_is_simple_use.  */
9891 
9892 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)9893 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9894 		     tree *comp_vectype, enum vect_def_type *dts,
9895 		     tree vectype)
9896 {
9897   tree lhs, rhs;
9898   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9899 
9900   /* Mask case.  */
9901   if (TREE_CODE (cond) == SSA_NAME
9902       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9903     {
9904       if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9905 	  || !*comp_vectype
9906 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9907 	return false;
9908       return true;
9909     }
9910 
9911   if (!COMPARISON_CLASS_P (cond))
9912     return false;
9913 
9914   lhs = TREE_OPERAND (cond, 0);
9915   rhs = TREE_OPERAND (cond, 1);
9916 
9917   if (TREE_CODE (lhs) == SSA_NAME)
9918     {
9919       if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9920 	return false;
9921     }
9922   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9923 	   || TREE_CODE (lhs) == FIXED_CST)
9924     dts[0] = vect_constant_def;
9925   else
9926     return false;
9927 
9928   if (TREE_CODE (rhs) == SSA_NAME)
9929     {
9930       if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9931 	return false;
9932     }
9933   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9934 	   || TREE_CODE (rhs) == FIXED_CST)
9935     dts[1] = vect_constant_def;
9936   else
9937     return false;
9938 
9939   if (vectype1 && vectype2
9940       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9941 		   TYPE_VECTOR_SUBPARTS (vectype2)))
9942     return false;
9943 
9944   *comp_vectype = vectype1 ? vectype1 : vectype2;
9945   /* Invariant comparison.  */
9946   if (! *comp_vectype)
9947     {
9948       tree scalar_type = TREE_TYPE (lhs);
9949       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9950 	*comp_vectype = truth_type_for (vectype);
9951       else
9952 	{
9953 	  /* If we can widen the comparison to match vectype do so.  */
9954 	  if (INTEGRAL_TYPE_P (scalar_type)
9955 	      && !slp_node
9956 	      && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9957 				  TYPE_SIZE (TREE_TYPE (vectype))))
9958 	    scalar_type = build_nonstandard_integer_type
9959 	      (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9960 	       TYPE_UNSIGNED (scalar_type));
9961 	  *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9962 						       slp_node);
9963 	}
9964     }
9965 
9966   return true;
9967 }
9968 
9969 /* vectorizable_condition.
9970 
9971    Check if STMT_INFO is conditional modify expression that can be vectorized.
9972    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9973    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
9974    at GSI.
9975 
9976    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9977 
9978    Return true if STMT_INFO is vectorizable in this way.  */
9979 
9980 static bool
vectorizable_condition(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)9981 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9982 			stmt_vec_info *vec_stmt,
9983 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9984 {
9985   vec_info *vinfo = stmt_info->vinfo;
9986   tree scalar_dest = NULL_TREE;
9987   tree vec_dest = NULL_TREE;
9988   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9989   tree then_clause, else_clause;
9990   tree comp_vectype = NULL_TREE;
9991   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9992   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9993   tree vec_compare;
9994   tree new_temp;
9995   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9996   enum vect_def_type dts[4]
9997     = {vect_unknown_def_type, vect_unknown_def_type,
9998        vect_unknown_def_type, vect_unknown_def_type};
9999   int ndts = 4;
10000   int ncopies;
10001   int vec_num;
10002   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10003   stmt_vec_info prev_stmt_info = NULL;
10004   int i, j;
10005   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10006   vec<tree> vec_oprnds0 = vNULL;
10007   vec<tree> vec_oprnds1 = vNULL;
10008   vec<tree> vec_oprnds2 = vNULL;
10009   vec<tree> vec_oprnds3 = vNULL;
10010   tree vec_cmp_type;
10011   bool masked = false;
10012 
10013   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10014     return false;
10015 
10016   /* Is vectorizable conditional operation?  */
10017   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10018   if (!stmt)
10019     return false;
10020 
10021   code = gimple_assign_rhs_code (stmt);
10022   if (code != COND_EXPR)
10023     return false;
10024 
10025   stmt_vec_info reduc_info = NULL;
10026   int reduc_index = -1;
10027   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10028   bool for_reduction
10029     = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10030   if (for_reduction)
10031     {
10032       if (STMT_SLP_TYPE (stmt_info))
10033 	return false;
10034       reduc_info = info_for_reduction (stmt_info);
10035       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10036       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10037       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10038 		  || reduc_index != -1);
10039     }
10040   else
10041     {
10042       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10043 	return false;
10044 
10045       /* FORNOW: only supported as part of a reduction.  */
10046       if (STMT_VINFO_LIVE_P (stmt_info))
10047 	{
10048 	  if (dump_enabled_p ())
10049 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10050 			     "value used after loop.\n");
10051 	  return false;
10052 	}
10053     }
10054 
10055   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10056   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10057 
10058   if (slp_node)
10059     {
10060       ncopies = 1;
10061       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10062     }
10063   else
10064     {
10065       ncopies = vect_get_num_copies (loop_vinfo, vectype);
10066       vec_num = 1;
10067     }
10068 
10069   gcc_assert (ncopies >= 1);
10070   if (for_reduction && ncopies > 1)
10071     return false; /* FORNOW */
10072 
10073   cond_expr = gimple_assign_rhs1 (stmt);
10074   then_clause = gimple_assign_rhs2 (stmt);
10075   else_clause = gimple_assign_rhs3 (stmt);
10076 
10077   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node,
10078 			    &comp_vectype, &dts[0], vectype)
10079       || !comp_vectype)
10080     return false;
10081 
10082   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
10083     return false;
10084   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
10085     return false;
10086 
10087   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10088     return false;
10089 
10090   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10091     return false;
10092 
10093   masked = !COMPARISON_CLASS_P (cond_expr);
10094   vec_cmp_type = truth_type_for (comp_vectype);
10095 
10096   if (vec_cmp_type == NULL_TREE)
10097     return false;
10098 
10099   cond_code = TREE_CODE (cond_expr);
10100   if (!masked)
10101     {
10102       cond_expr0 = TREE_OPERAND (cond_expr, 0);
10103       cond_expr1 = TREE_OPERAND (cond_expr, 1);
10104     }
10105 
10106   /* For conditional reductions, the "then" value needs to be the candidate
10107      value calculated by this iteration while the "else" value needs to be
10108      the result carried over from previous iterations.  If the COND_EXPR
10109      is the other way around, we need to swap it.  */
10110   bool must_invert_cmp_result = false;
10111   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10112     {
10113       if (masked)
10114 	must_invert_cmp_result = true;
10115       else
10116 	{
10117 	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10118 	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10119 	  if (new_code == ERROR_MARK)
10120 	    must_invert_cmp_result = true;
10121 	  else
10122 	    {
10123 	      cond_code = new_code;
10124 	      /* Make sure we don't accidentally use the old condition.  */
10125 	      cond_expr = NULL_TREE;
10126 	    }
10127 	}
10128       std::swap (then_clause, else_clause);
10129     }
10130 
10131   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10132     {
10133       /* Boolean values may have another representation in vectors
10134 	 and therefore we prefer bit operations over comparison for
10135 	 them (which also works for scalar masks).  We store opcodes
10136 	 to use in bitop1 and bitop2.  Statement is vectorized as
10137 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10138 	 depending on bitop1 and bitop2 arity.  */
10139       switch (cond_code)
10140 	{
10141 	case GT_EXPR:
10142 	  bitop1 = BIT_NOT_EXPR;
10143 	  bitop2 = BIT_AND_EXPR;
10144 	  break;
10145 	case GE_EXPR:
10146 	  bitop1 = BIT_NOT_EXPR;
10147 	  bitop2 = BIT_IOR_EXPR;
10148 	  break;
10149 	case LT_EXPR:
10150 	  bitop1 = BIT_NOT_EXPR;
10151 	  bitop2 = BIT_AND_EXPR;
10152 	  std::swap (cond_expr0, cond_expr1);
10153 	  break;
10154 	case LE_EXPR:
10155 	  bitop1 = BIT_NOT_EXPR;
10156 	  bitop2 = BIT_IOR_EXPR;
10157 	  std::swap (cond_expr0, cond_expr1);
10158 	  break;
10159 	case NE_EXPR:
10160 	  bitop1 = BIT_XOR_EXPR;
10161 	  break;
10162 	case EQ_EXPR:
10163 	  bitop1 = BIT_XOR_EXPR;
10164 	  bitop2 = BIT_NOT_EXPR;
10165 	  break;
10166 	default:
10167 	  return false;
10168 	}
10169       cond_code = SSA_NAME;
10170     }
10171 
10172   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10173       && reduction_type == EXTRACT_LAST_REDUCTION
10174       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10175     {
10176       if (dump_enabled_p ())
10177 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10178 			 "reduction comparison operation not supported.\n");
10179       return false;
10180     }
10181 
10182   if (!vec_stmt)
10183     {
10184       if (bitop1 != NOP_EXPR)
10185 	{
10186 	  machine_mode mode = TYPE_MODE (comp_vectype);
10187 	  optab optab;
10188 
10189 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10190 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10191 	    return false;
10192 
10193 	  if (bitop2 != NOP_EXPR)
10194 	    {
10195 	      optab = optab_for_tree_code (bitop2, comp_vectype,
10196 					   optab_default);
10197 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10198 		return false;
10199 	    }
10200 	}
10201 
10202       if (loop_vinfo
10203 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10204 	  && reduction_type == EXTRACT_LAST_REDUCTION)
10205 	vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10206 			       ncopies * vec_num, vectype, NULL);
10207 
10208       vect_cost_for_stmt kind = vector_stmt;
10209       if (reduction_type == EXTRACT_LAST_REDUCTION)
10210 	/* Count one reduction-like operation per vector.  */
10211 	kind = vec_to_scalar;
10212       else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10213 	return false;
10214 
10215       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10216       vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10217 			      cost_vec, kind);
10218       return true;
10219     }
10220 
10221   /* Transform.  */
10222 
10223   if (!slp_node)
10224     {
10225       vec_oprnds0.create (1);
10226       vec_oprnds1.create (1);
10227       vec_oprnds2.create (1);
10228       vec_oprnds3.create (1);
10229     }
10230 
10231   /* Handle def.  */
10232   scalar_dest = gimple_assign_lhs (stmt);
10233   if (reduction_type != EXTRACT_LAST_REDUCTION)
10234     vec_dest = vect_create_destination_var (scalar_dest, vectype);
10235 
10236   /* Handle cond expr.  */
10237   for (j = 0; j < ncopies; j++)
10238     {
10239       bool swap_cond_operands = false;
10240 
10241       /* See whether another part of the vectorized code applies a loop
10242 	 mask to the condition, or to its inverse.  */
10243 
10244       vec_loop_masks *masks = NULL;
10245       if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10246 	{
10247 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
10248 	    masks = &LOOP_VINFO_MASKS (loop_vinfo);
10249 	  else
10250 	    {
10251 	      scalar_cond_masked_key cond (cond_expr, ncopies);
10252 	      if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10253 		masks = &LOOP_VINFO_MASKS (loop_vinfo);
10254 	      else
10255 		{
10256 		  bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10257 		  cond.code = invert_tree_comparison (cond.code, honor_nans);
10258 		  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10259 		    {
10260 		      masks = &LOOP_VINFO_MASKS (loop_vinfo);
10261 		      cond_code = cond.code;
10262 		      swap_cond_operands = true;
10263 		    }
10264 		}
10265 	    }
10266 	}
10267 
10268       stmt_vec_info new_stmt_info = NULL;
10269       if (j == 0)
10270 	{
10271           if (slp_node)
10272             {
10273 	      auto_vec<vec<tree>, 4> vec_defs;
10274               vect_get_slp_defs (slp_node, &vec_defs);
10275 	      vec_oprnds3 = vec_defs.pop ();
10276 	      vec_oprnds2 = vec_defs.pop ();
10277 	      if (!masked)
10278 		vec_oprnds1 = vec_defs.pop ();
10279 	      vec_oprnds0 = vec_defs.pop ();
10280             }
10281           else
10282             {
10283 	      if (masked)
10284 		{
10285 		  vec_cond_lhs
10286 		    = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10287 						    comp_vectype);
10288 		}
10289 	      else
10290 		{
10291 		  vec_cond_lhs
10292 		    = vect_get_vec_def_for_operand (cond_expr0,
10293 						    stmt_info, comp_vectype);
10294 		  vec_cond_rhs
10295 		    = vect_get_vec_def_for_operand (cond_expr1,
10296 						    stmt_info, comp_vectype);
10297 		}
10298 	      vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10299 							      stmt_info);
10300 	      if (reduction_type != EXTRACT_LAST_REDUCTION)
10301 		vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10302 								stmt_info);
10303 	    }
10304 	}
10305       else
10306 	{
10307 	  vec_cond_lhs
10308 	    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10309 	  if (!masked)
10310 	    vec_cond_rhs
10311 	      = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10312 
10313 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10314 							    vec_oprnds2.pop ());
10315 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10316 							    vec_oprnds3.pop ());
10317 	}
10318 
10319       if (!slp_node)
10320         {
10321 	  vec_oprnds0.quick_push (vec_cond_lhs);
10322 	  if (!masked)
10323 	    vec_oprnds1.quick_push (vec_cond_rhs);
10324 	  vec_oprnds2.quick_push (vec_then_clause);
10325 	  vec_oprnds3.quick_push (vec_else_clause);
10326 	}
10327 
10328       /* Arguments are ready.  Create the new vector stmt.  */
10329       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10330         {
10331           vec_then_clause = vec_oprnds2[i];
10332           vec_else_clause = vec_oprnds3[i];
10333 
10334 	  if (swap_cond_operands)
10335 	    std::swap (vec_then_clause, vec_else_clause);
10336 
10337 	  if (masked)
10338 	    vec_compare = vec_cond_lhs;
10339 	  else
10340 	    {
10341 	      vec_cond_rhs = vec_oprnds1[i];
10342 	      if (bitop1 == NOP_EXPR)
10343 		vec_compare = build2 (cond_code, vec_cmp_type,
10344 				      vec_cond_lhs, vec_cond_rhs);
10345 	      else
10346 		{
10347 		  new_temp = make_ssa_name (vec_cmp_type);
10348 		  gassign *new_stmt;
10349 		  if (bitop1 == BIT_NOT_EXPR)
10350 		    new_stmt = gimple_build_assign (new_temp, bitop1,
10351 						    vec_cond_rhs);
10352 		  else
10353 		    new_stmt
10354 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10355 					     vec_cond_rhs);
10356 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10357 		  if (bitop2 == NOP_EXPR)
10358 		    vec_compare = new_temp;
10359 		  else if (bitop2 == BIT_NOT_EXPR)
10360 		    {
10361 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
10362 		      vec_compare = new_temp;
10363 		      std::swap (vec_then_clause, vec_else_clause);
10364 		    }
10365 		  else
10366 		    {
10367 		      vec_compare = make_ssa_name (vec_cmp_type);
10368 		      new_stmt
10369 			= gimple_build_assign (vec_compare, bitop2,
10370 					       vec_cond_lhs, new_temp);
10371 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10372 		    }
10373 		}
10374 	    }
10375 
10376 	  /* If we decided to apply a loop mask to the result of the vector
10377              comparison, AND the comparison with the mask now.  Later passes
10378              should then be able to reuse the AND results between mulitple
10379              vector statements.
10380 
10381 	     For example:
10382 	     for (int i = 0; i < 100; ++i)
10383 	       x[i] = y[i] ? z[i] : 10;
10384 
10385 	     results in following optimized GIMPLE:
10386 
10387 	     mask__35.8_43 = vect__4.7_41 != { 0, ... };
10388 	     vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10389 	     _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10390 	     vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10391 	     vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10392 					       vect_iftmp.11_47, { 10, ... }>;
10393 
10394 	     instead of using a masked and unmasked forms of
10395 	     vec != { 0, ... } (masked in the MASK_LOAD,
10396 	     unmasked in the VEC_COND_EXPR).  */
10397 
10398 	  /* Force vec_compare to be an SSA_NAME rather than a comparison,
10399 	     in cases where that's necessary.  */
10400 
10401 	  if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10402 	    {
10403 	      if (!is_gimple_val (vec_compare))
10404 		{
10405 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
10406 		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
10407 							   vec_compare);
10408 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10409 		  vec_compare = vec_compare_name;
10410 		}
10411 
10412 	      if (must_invert_cmp_result)
10413 		{
10414 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
10415 		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
10416 							   BIT_NOT_EXPR,
10417 							   vec_compare);
10418 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10419 		  vec_compare = vec_compare_name;
10420 		}
10421 
10422 	      if (masks)
10423 		{
10424 		  unsigned vec_num = vec_oprnds0.length ();
10425 		  tree loop_mask
10426 		    = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10427 					  vectype, vec_num * j + i);
10428 		  tree tmp2 = make_ssa_name (vec_cmp_type);
10429 		  gassign *g
10430 		    = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10431 					   loop_mask);
10432 		  vect_finish_stmt_generation (stmt_info, g, gsi);
10433 		  vec_compare = tmp2;
10434 		}
10435 	    }
10436 
10437 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
10438 	    {
10439 	      gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10440 	      tree lhs = gimple_get_lhs (old_stmt);
10441 	      gcall *new_stmt = gimple_build_call_internal
10442 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10443 		 vec_then_clause);
10444 	      gimple_call_set_lhs (new_stmt, lhs);
10445 	      SSA_NAME_DEF_STMT (lhs) = new_stmt;
10446 	      if (old_stmt == gsi_stmt (*gsi))
10447 		new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10448 	      else
10449 		{
10450 		  /* In this case we're moving the definition to later in the
10451 		     block.  That doesn't matter because the only uses of the
10452 		     lhs are in phi statements.  */
10453 		  gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10454 		  gsi_remove (&old_gsi, true);
10455 		  new_stmt_info
10456 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10457 		}
10458 	    }
10459 	  else
10460 	    {
10461 	      new_temp = make_ssa_name (vec_dest);
10462 	      gassign *new_stmt
10463 		= gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10464 				       vec_then_clause, vec_else_clause);
10465 	      new_stmt_info
10466 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10467 	    }
10468           if (slp_node)
10469 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10470         }
10471 
10472         if (slp_node)
10473           continue;
10474 
10475 	if (j == 0)
10476 	  STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10477 	else
10478 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10479 
10480 	prev_stmt_info = new_stmt_info;
10481     }
10482 
10483   vec_oprnds0.release ();
10484   vec_oprnds1.release ();
10485   vec_oprnds2.release ();
10486   vec_oprnds3.release ();
10487 
10488   return true;
10489 }
10490 
10491 /* vectorizable_comparison.
10492 
10493    Check if STMT_INFO is comparison expression that can be vectorized.
10494    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10495    comparison, put it in VEC_STMT, and insert it at GSI.
10496 
10497    Return true if STMT_INFO is vectorizable in this way.  */
10498 
10499 static bool
vectorizable_comparison(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,stmt_vec_info * vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10500 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10501 			 stmt_vec_info *vec_stmt,
10502 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10503 {
10504   vec_info *vinfo = stmt_info->vinfo;
10505   tree lhs, rhs1, rhs2;
10506   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10507   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10508   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10509   tree new_temp;
10510   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10511   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10512   int ndts = 2;
10513   poly_uint64 nunits;
10514   int ncopies;
10515   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10516   stmt_vec_info prev_stmt_info = NULL;
10517   int i, j;
10518   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10519   vec<tree> vec_oprnds0 = vNULL;
10520   vec<tree> vec_oprnds1 = vNULL;
10521   tree mask_type;
10522   tree mask;
10523 
10524   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10525     return false;
10526 
10527   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10528     return false;
10529 
10530   mask_type = vectype;
10531   nunits = TYPE_VECTOR_SUBPARTS (vectype);
10532 
10533   if (slp_node)
10534     ncopies = 1;
10535   else
10536     ncopies = vect_get_num_copies (loop_vinfo, vectype);
10537 
10538   gcc_assert (ncopies >= 1);
10539   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10540     return false;
10541 
10542   if (STMT_VINFO_LIVE_P (stmt_info))
10543     {
10544       if (dump_enabled_p ())
10545 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10546 			 "value used after loop.\n");
10547       return false;
10548     }
10549 
10550   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10551   if (!stmt)
10552     return false;
10553 
10554   code = gimple_assign_rhs_code (stmt);
10555 
10556   if (TREE_CODE_CLASS (code) != tcc_comparison)
10557     return false;
10558 
10559   rhs1 = gimple_assign_rhs1 (stmt);
10560   rhs2 = gimple_assign_rhs2 (stmt);
10561 
10562   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10563     return false;
10564 
10565   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10566     return false;
10567 
10568   if (vectype1 && vectype2
10569       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10570 		   TYPE_VECTOR_SUBPARTS (vectype2)))
10571     return false;
10572 
10573   vectype = vectype1 ? vectype1 : vectype2;
10574 
10575   /* Invariant comparison.  */
10576   if (!vectype)
10577     {
10578       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10579 	vectype = mask_type;
10580       else
10581 	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10582 					       slp_node);
10583       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10584 	return false;
10585     }
10586   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10587     return false;
10588 
10589   /* Can't compare mask and non-mask types.  */
10590   if (vectype1 && vectype2
10591       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10592     return false;
10593 
10594   /* Boolean values may have another representation in vectors
10595      and therefore we prefer bit operations over comparison for
10596      them (which also works for scalar masks).  We store opcodes
10597      to use in bitop1 and bitop2.  Statement is vectorized as
10598        BITOP2 (rhs1 BITOP1 rhs2) or
10599        rhs1 BITOP2 (BITOP1 rhs2)
10600      depending on bitop1 and bitop2 arity.  */
10601   bool swap_p = false;
10602   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10603     {
10604       if (code == GT_EXPR)
10605 	{
10606 	  bitop1 = BIT_NOT_EXPR;
10607 	  bitop2 = BIT_AND_EXPR;
10608 	}
10609       else if (code == GE_EXPR)
10610 	{
10611 	  bitop1 = BIT_NOT_EXPR;
10612 	  bitop2 = BIT_IOR_EXPR;
10613 	}
10614       else if (code == LT_EXPR)
10615 	{
10616 	  bitop1 = BIT_NOT_EXPR;
10617 	  bitop2 = BIT_AND_EXPR;
10618 	  swap_p = true;
10619 	}
10620       else if (code == LE_EXPR)
10621 	{
10622 	  bitop1 = BIT_NOT_EXPR;
10623 	  bitop2 = BIT_IOR_EXPR;
10624 	  swap_p = true;
10625 	}
10626       else
10627 	{
10628 	  bitop1 = BIT_XOR_EXPR;
10629 	  if (code == EQ_EXPR)
10630 	    bitop2 = BIT_NOT_EXPR;
10631 	}
10632     }
10633 
10634   if (!vec_stmt)
10635     {
10636       if (bitop1 == NOP_EXPR)
10637 	{
10638 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10639 	    return false;
10640 	}
10641       else
10642 	{
10643 	  machine_mode mode = TYPE_MODE (vectype);
10644 	  optab optab;
10645 
10646 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
10647 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10648 	    return false;
10649 
10650 	  if (bitop2 != NOP_EXPR)
10651 	    {
10652 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
10653 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10654 		return false;
10655 	    }
10656 	}
10657 
10658       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10659       vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10660 			      dts, ndts, slp_node, cost_vec);
10661       return true;
10662     }
10663 
10664   /* Transform.  */
10665   if (!slp_node)
10666     {
10667       vec_oprnds0.create (1);
10668       vec_oprnds1.create (1);
10669     }
10670 
10671   /* Handle def.  */
10672   lhs = gimple_assign_lhs (stmt);
10673   mask = vect_create_destination_var (lhs, mask_type);
10674 
10675   /* Handle cmp expr.  */
10676   for (j = 0; j < ncopies; j++)
10677     {
10678       stmt_vec_info new_stmt_info = NULL;
10679       if (j == 0)
10680 	{
10681 	  if (slp_node)
10682 	    {
10683 	      auto_vec<vec<tree>, 2> vec_defs;
10684 	      vect_get_slp_defs (slp_node, &vec_defs);
10685 	      vec_oprnds1 = vec_defs.pop ();
10686 	      vec_oprnds0 = vec_defs.pop ();
10687 	      if (swap_p)
10688 		std::swap (vec_oprnds0, vec_oprnds1);
10689 	    }
10690 	  else
10691 	    {
10692 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10693 						       vectype);
10694 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10695 						       vectype);
10696 	    }
10697 	}
10698       else
10699 	{
10700 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10701 						     vec_oprnds0.pop ());
10702 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10703 						     vec_oprnds1.pop ());
10704 	}
10705 
10706       if (!slp_node)
10707 	{
10708 	  if (swap_p && j == 0)
10709 	    std::swap (vec_rhs1, vec_rhs2);
10710 	  vec_oprnds0.quick_push (vec_rhs1);
10711 	  vec_oprnds1.quick_push (vec_rhs2);
10712 	}
10713 
10714       /* Arguments are ready.  Create the new vector stmt.  */
10715       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10716 	{
10717 	  vec_rhs2 = vec_oprnds1[i];
10718 
10719 	  new_temp = make_ssa_name (mask);
10720 	  if (bitop1 == NOP_EXPR)
10721 	    {
10722 	      gassign *new_stmt = gimple_build_assign (new_temp, code,
10723 						       vec_rhs1, vec_rhs2);
10724 	      new_stmt_info
10725 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10726 	    }
10727 	  else
10728 	    {
10729 	      gassign *new_stmt;
10730 	      if (bitop1 == BIT_NOT_EXPR)
10731 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10732 	      else
10733 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10734 						vec_rhs2);
10735 	      new_stmt_info
10736 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10737 	      if (bitop2 != NOP_EXPR)
10738 		{
10739 		  tree res = make_ssa_name (mask);
10740 		  if (bitop2 == BIT_NOT_EXPR)
10741 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
10742 		  else
10743 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10744 						    new_temp);
10745 		  new_stmt_info
10746 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10747 		}
10748 	    }
10749 	  if (slp_node)
10750 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10751 	}
10752 
10753       if (slp_node)
10754 	continue;
10755 
10756       if (j == 0)
10757 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10758       else
10759 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10760 
10761       prev_stmt_info = new_stmt_info;
10762     }
10763 
10764   vec_oprnds0.release ();
10765   vec_oprnds1.release ();
10766 
10767   return true;
10768 }
10769 
10770 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10771    can handle all live statements in the node.  Otherwise return true
10772    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10773    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
10774 
10775 static bool
can_vectorize_live_stmts(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)10776 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10777 			  slp_tree slp_node, slp_instance slp_node_instance,
10778 			  bool vec_stmt_p,
10779 			  stmt_vector_for_cost *cost_vec)
10780 {
10781   if (slp_node)
10782     {
10783       stmt_vec_info slp_stmt_info;
10784       unsigned int i;
10785       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10786 	{
10787 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
10788 	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10789 					       slp_node_instance, i,
10790 					       vec_stmt_p, cost_vec))
10791 	    return false;
10792 	}
10793     }
10794   else if (STMT_VINFO_LIVE_P (stmt_info)
10795 	   && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10796 					    slp_node_instance, -1,
10797 					    vec_stmt_p, cost_vec))
10798     return false;
10799 
10800   return true;
10801 }
10802 
10803 /* Make sure the statement is vectorizable.  */
10804 
10805 opt_result
vect_analyze_stmt(stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)10806 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10807 		   slp_tree node, slp_instance node_instance,
10808 		   stmt_vector_for_cost *cost_vec)
10809 {
10810   vec_info *vinfo = stmt_info->vinfo;
10811   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10812   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10813   bool ok;
10814   gimple_seq pattern_def_seq;
10815 
10816   if (dump_enabled_p ())
10817     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10818 		     stmt_info->stmt);
10819 
10820   if (gimple_has_volatile_ops (stmt_info->stmt))
10821     return opt_result::failure_at (stmt_info->stmt,
10822 				   "not vectorized:"
10823 				   " stmt has volatile operands: %G\n",
10824 				   stmt_info->stmt);
10825 
10826   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10827       && node == NULL
10828       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10829     {
10830       gimple_stmt_iterator si;
10831 
10832       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10833 	{
10834 	  stmt_vec_info pattern_def_stmt_info
10835 	    = vinfo->lookup_stmt (gsi_stmt (si));
10836 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10837 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10838 	    {
10839 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
10840 	      if (dump_enabled_p ())
10841 		dump_printf_loc (MSG_NOTE, vect_location,
10842 				 "==> examining pattern def statement: %G",
10843 				 pattern_def_stmt_info->stmt);
10844 
10845 	      opt_result res
10846 		= vect_analyze_stmt (pattern_def_stmt_info,
10847 				     need_to_vectorize, node, node_instance,
10848 				     cost_vec);
10849 	      if (!res)
10850 		return res;
10851 	    }
10852 	}
10853     }
10854 
10855   /* Skip stmts that do not need to be vectorized. In loops this is expected
10856      to include:
10857      - the COND_EXPR which is the loop exit condition
10858      - any LABEL_EXPRs in the loop
10859      - computations that are used only for array indexing or loop control.
10860      In basic blocks we only analyze statements that are a part of some SLP
10861      instance, therefore, all the statements are relevant.
10862 
10863      Pattern statement needs to be analyzed instead of the original statement
10864      if the original statement is not relevant.  Otherwise, we analyze both
10865      statements.  In basic blocks we are called from some SLP instance
10866      traversal, don't analyze pattern stmts instead, the pattern stmts
10867      already will be part of SLP instance.  */
10868 
10869   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10870   if (!STMT_VINFO_RELEVANT_P (stmt_info)
10871       && !STMT_VINFO_LIVE_P (stmt_info))
10872     {
10873       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10874 	  && pattern_stmt_info
10875 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10876 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10877         {
10878           /* Analyze PATTERN_STMT instead of the original stmt.  */
10879 	  stmt_info = pattern_stmt_info;
10880           if (dump_enabled_p ())
10881 	    dump_printf_loc (MSG_NOTE, vect_location,
10882 			     "==> examining pattern statement: %G",
10883 			     stmt_info->stmt);
10884         }
10885       else
10886         {
10887           if (dump_enabled_p ())
10888             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10889 
10890           return opt_result::success ();
10891         }
10892     }
10893   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10894 	   && node == NULL
10895 	   && pattern_stmt_info
10896 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10897 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10898     {
10899       /* Analyze PATTERN_STMT too.  */
10900       if (dump_enabled_p ())
10901 	dump_printf_loc (MSG_NOTE, vect_location,
10902 			 "==> examining pattern statement: %G",
10903 			 pattern_stmt_info->stmt);
10904 
10905       opt_result res
10906 	= vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10907 			     node_instance, cost_vec);
10908       if (!res)
10909 	return res;
10910    }
10911 
10912   switch (STMT_VINFO_DEF_TYPE (stmt_info))
10913     {
10914       case vect_internal_def:
10915         break;
10916 
10917       case vect_reduction_def:
10918       case vect_nested_cycle:
10919          gcc_assert (!bb_vinfo
10920 		     && (relevance == vect_used_in_outer
10921 			 || relevance == vect_used_in_outer_by_reduction
10922 			 || relevance == vect_used_by_reduction
10923 			 || relevance == vect_unused_in_scope
10924 			 || relevance == vect_used_only_live));
10925          break;
10926 
10927       case vect_induction_def:
10928 	gcc_assert (!bb_vinfo);
10929 	break;
10930 
10931       case vect_constant_def:
10932       case vect_external_def:
10933       case vect_unknown_def_type:
10934       default:
10935         gcc_unreachable ();
10936     }
10937 
10938   if (STMT_VINFO_RELEVANT_P (stmt_info))
10939     {
10940       tree type = gimple_expr_type (stmt_info->stmt);
10941       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10942       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10943       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10944 		  || (call && gimple_call_lhs (call) == NULL_TREE));
10945       *need_to_vectorize = true;
10946     }
10947 
10948   if (PURE_SLP_STMT (stmt_info) && !node)
10949     {
10950       if (dump_enabled_p ())
10951 	dump_printf_loc (MSG_NOTE, vect_location,
10952 			 "handled only by SLP analysis\n");
10953       return opt_result::success ();
10954     }
10955 
10956   ok = true;
10957   if (!bb_vinfo
10958       && (STMT_VINFO_RELEVANT_P (stmt_info)
10959 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10960     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10961        -mveclibabi= takes preference over library functions with
10962        the simd attribute.  */
10963     ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10964 	  || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10965 					   cost_vec)
10966 	  || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10967 	  || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10968 	  || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10969 	  || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10970 				cost_vec)
10971 	  || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10972 	  || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10973 	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10974 	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10975 	  || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10976 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
10977 				      cost_vec)
10978 	  || vectorizable_lc_phi (stmt_info, NULL, node));
10979   else
10980     {
10981       if (bb_vinfo)
10982 	ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10983 	      || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10984 					       cost_vec)
10985 	      || vectorizable_conversion (stmt_info, NULL, NULL, node,
10986 					  cost_vec)
10987 	      || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10988 	      || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10989 	      || vectorizable_assignment (stmt_info, NULL, NULL, node,
10990 					  cost_vec)
10991 	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10992 				    cost_vec)
10993 	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10994 	      || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10995 	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
10996 					  cost_vec));
10997     }
10998 
10999   if (!ok)
11000     return opt_result::failure_at (stmt_info->stmt,
11001 				   "not vectorized:"
11002 				   " relevant stmt not supported: %G",
11003 				   stmt_info->stmt);
11004 
11005   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11006       need extra handling, except for vectorizable reductions.  */
11007   if (!bb_vinfo
11008       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11009       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11010       && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
11011 				    false, cost_vec))
11012     return opt_result::failure_at (stmt_info->stmt,
11013 				   "not vectorized:"
11014 				   " live stmt not supported: %G",
11015 				   stmt_info->stmt);
11016 
11017   return opt_result::success ();
11018 }
11019 
11020 
11021 /* Function vect_transform_stmt.
11022 
11023    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
11024 
11025 bool
vect_transform_stmt(stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11026 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11027 		     slp_tree slp_node, slp_instance slp_node_instance)
11028 {
11029   vec_info *vinfo = stmt_info->vinfo;
11030   bool is_store = false;
11031   stmt_vec_info vec_stmt = NULL;
11032   bool done;
11033 
11034   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11035   stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11036 
11037   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
11038 		   && nested_in_vect_loop_p
11039 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
11040 			 stmt_info));
11041 
11042   gimple *stmt = stmt_info->stmt;
11043   switch (STMT_VINFO_TYPE (stmt_info))
11044     {
11045     case type_demotion_vec_info_type:
11046     case type_promotion_vec_info_type:
11047     case type_conversion_vec_info_type:
11048       done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
11049 				      NULL);
11050       gcc_assert (done);
11051       break;
11052 
11053     case induc_vec_info_type:
11054       done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
11055 				     NULL);
11056       gcc_assert (done);
11057       break;
11058 
11059     case shift_vec_info_type:
11060       done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11061       gcc_assert (done);
11062       break;
11063 
11064     case op_vec_info_type:
11065       done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
11066 				     NULL);
11067       gcc_assert (done);
11068       break;
11069 
11070     case assignment_vec_info_type:
11071       done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
11072 				      NULL);
11073       gcc_assert (done);
11074       break;
11075 
11076     case load_vec_info_type:
11077       done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
11078                                 slp_node_instance, NULL);
11079       gcc_assert (done);
11080       break;
11081 
11082     case store_vec_info_type:
11083       done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11084       gcc_assert (done);
11085       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11086 	{
11087 	  /* In case of interleaving, the whole chain is vectorized when the
11088 	     last store in the chain is reached.  Store stmts before the last
11089 	     one are skipped, and there vec_stmt_info shouldn't be freed
11090 	     meanwhile.  */
11091 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11092 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11093 	    is_store = true;
11094 	}
11095       else
11096 	is_store = true;
11097       break;
11098 
11099     case condition_vec_info_type:
11100       done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11101       gcc_assert (done);
11102       break;
11103 
11104     case comparison_vec_info_type:
11105       done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
11106 				      slp_node, NULL);
11107       gcc_assert (done);
11108       break;
11109 
11110     case call_vec_info_type:
11111       done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11112       stmt = gsi_stmt (*gsi);
11113       break;
11114 
11115     case call_simd_clone_vec_info_type:
11116       done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
11117 					   slp_node, NULL);
11118       stmt = gsi_stmt (*gsi);
11119       break;
11120 
11121     case reduc_vec_info_type:
11122       done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
11123       gcc_assert (done);
11124       break;
11125 
11126     case cycle_phi_info_type:
11127       done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
11128 				       slp_node_instance);
11129       gcc_assert (done);
11130       break;
11131 
11132     case lc_phi_info_type:
11133       done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
11134       gcc_assert (done);
11135       break;
11136 
11137     default:
11138       if (!STMT_VINFO_LIVE_P (stmt_info))
11139 	{
11140 	  if (dump_enabled_p ())
11141 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11142                              "stmt not supported.\n");
11143 	  gcc_unreachable ();
11144 	}
11145     }
11146 
11147   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11148      This would break hybrid SLP vectorization.  */
11149   if (slp_node)
11150     gcc_assert (!vec_stmt
11151 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11152 
11153   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11154      is being vectorized, but outside the immediately enclosing loop.  */
11155   if (vec_stmt
11156       && nested_p
11157       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11158       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11159           || STMT_VINFO_RELEVANT (stmt_info) ==
11160                                            vect_used_in_outer_by_reduction))
11161     {
11162       class loop *innerloop = LOOP_VINFO_LOOP (
11163                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11164       imm_use_iterator imm_iter;
11165       use_operand_p use_p;
11166       tree scalar_dest;
11167 
11168       if (dump_enabled_p ())
11169         dump_printf_loc (MSG_NOTE, vect_location,
11170                          "Record the vdef for outer-loop vectorization.\n");
11171 
11172       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11173         (to be used when vectorizing outer-loop stmts that use the DEF of
11174         STMT).  */
11175       if (gimple_code (stmt) == GIMPLE_PHI)
11176         scalar_dest = PHI_RESULT (stmt);
11177       else
11178         scalar_dest = gimple_get_lhs (stmt);
11179 
11180       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11181 	if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11182 	  {
11183 	    stmt_vec_info exit_phi_info
11184 	      = vinfo->lookup_stmt (USE_STMT (use_p));
11185 	    STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11186 	  }
11187     }
11188 
11189   if (vec_stmt)
11190     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11191 
11192   if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11193     return is_store;
11194 
11195   /* If this stmt defines a value used on a backedge, update the
11196      vectorized PHIs.  */
11197   stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11198   stmt_vec_info reduc_info;
11199   if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11200       && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11201       && (reduc_info = info_for_reduction (orig_stmt_info))
11202       && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11203       && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11204     {
11205       gphi *phi;
11206       edge e;
11207       if (!slp_node
11208 	  && (phi = dyn_cast <gphi *>
11209 		      (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11210 	  && dominated_by_p (CDI_DOMINATORS,
11211 			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11212 	  && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11213 	  && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11214 	      == gimple_get_lhs (orig_stmt_info->stmt)))
11215 	{
11216 	  stmt_vec_info phi_info
11217 	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11218 	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11219 	  do
11220 	    {
11221 	      add_phi_arg (as_a <gphi *> (phi_info->stmt),
11222 			   gimple_get_lhs (vec_stmt->stmt), e,
11223 			   gimple_phi_arg_location (phi, e->dest_idx));
11224 	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11225 	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11226 	    }
11227 	  while (phi_info);
11228 	  gcc_assert (!vec_stmt);
11229 	}
11230       else if (slp_node
11231 	       && slp_node != slp_node_instance->reduc_phis)
11232 	{
11233 	  slp_tree phi_node = slp_node_instance->reduc_phis;
11234 	  gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11235 	  e = loop_latch_edge (gimple_bb (phi)->loop_father);
11236 	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11237 		      == SLP_TREE_VEC_STMTS (slp_node).length ());
11238 	  for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11239 	    add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11240 			 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11241 			 e, gimple_phi_arg_location (phi, e->dest_idx));
11242 	}
11243     }
11244 
11245   /* Handle stmts whose DEF is used outside the loop-nest that is
11246      being vectorized.  */
11247   done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11248 				   slp_node_instance, true, NULL);
11249   gcc_assert (done);
11250 
11251   return false;
11252 }
11253 
11254 
11255 /* Remove a group of stores (for SLP or interleaving), free their
11256    stmt_vec_info.  */
11257 
11258 void
vect_remove_stores(stmt_vec_info first_stmt_info)11259 vect_remove_stores (stmt_vec_info first_stmt_info)
11260 {
11261   vec_info *vinfo = first_stmt_info->vinfo;
11262   stmt_vec_info next_stmt_info = first_stmt_info;
11263 
11264   while (next_stmt_info)
11265     {
11266       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11267       next_stmt_info = vect_orig_stmt (next_stmt_info);
11268       /* Free the attached stmt_vec_info and remove the stmt.  */
11269       vinfo->remove_stmt (next_stmt_info);
11270       next_stmt_info = tmp;
11271     }
11272 }
11273 
11274 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11275    elements of type SCALAR_TYPE, or null if the target doesn't support
11276    such a type.
11277 
11278    If NUNITS is zero, return a vector type that contains elements of
11279    type SCALAR_TYPE, choosing whichever vector size the target prefers.
11280 
11281    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11282    for this vectorization region and want to "autodetect" the best choice.
11283    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11284    and we want the new type to be interoperable with it.   PREVAILING_MODE
11285    in this case can be a scalar integer mode or a vector mode; when it
11286    is a vector mode, the function acts like a tree-level version of
11287    related_vector_mode.  */
11288 
11289 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11290 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11291 				     tree scalar_type, poly_uint64 nunits)
11292 {
11293   tree orig_scalar_type = scalar_type;
11294   scalar_mode inner_mode;
11295   machine_mode simd_mode;
11296   tree vectype;
11297 
11298   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11299       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11300     return NULL_TREE;
11301 
11302   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11303 
11304   /* For vector types of elements whose mode precision doesn't
11305      match their types precision we use a element type of mode
11306      precision.  The vectorization routines will have to make sure
11307      they support the proper result truncation/extension.
11308      We also make sure to build vector types with INTEGER_TYPE
11309      component type only.  */
11310   if (INTEGRAL_TYPE_P (scalar_type)
11311       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11312 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
11313     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11314 						  TYPE_UNSIGNED (scalar_type));
11315 
11316   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11317      When the component mode passes the above test simply use a type
11318      corresponding to that mode.  The theory is that any use that
11319      would cause problems with this will disable vectorization anyway.  */
11320   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11321 	   && !INTEGRAL_TYPE_P (scalar_type))
11322     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11323 
11324   /* We can't build a vector type of elements with alignment bigger than
11325      their size.  */
11326   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11327     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11328 						  TYPE_UNSIGNED (scalar_type));
11329 
11330   /* If we felt back to using the mode fail if there was
11331      no scalar type for it.  */
11332   if (scalar_type == NULL_TREE)
11333     return NULL_TREE;
11334 
11335   /* If no prevailing mode was supplied, use the mode the target prefers.
11336      Otherwise lookup a vector mode based on the prevailing mode.  */
11337   if (prevailing_mode == VOIDmode)
11338     {
11339       gcc_assert (known_eq (nunits, 0U));
11340       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11341       if (SCALAR_INT_MODE_P (simd_mode))
11342 	{
11343 	  /* Traditional behavior is not to take the integer mode
11344 	     literally, but simply to use it as a way of determining
11345 	     the vector size.  It is up to mode_for_vector to decide
11346 	     what the TYPE_MODE should be.
11347 
11348 	     Note that nunits == 1 is allowed in order to support single
11349 	     element vector types.  */
11350 	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11351 	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11352 	    return NULL_TREE;
11353 	}
11354     }
11355   else if (SCALAR_INT_MODE_P (prevailing_mode)
11356 	   || !related_vector_mode (prevailing_mode,
11357 				    inner_mode, nunits).exists (&simd_mode))
11358     {
11359       /* Fall back to using mode_for_vector, mostly in the hope of being
11360 	 able to use an integer mode.  */
11361       if (known_eq (nunits, 0U)
11362 	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11363 	return NULL_TREE;
11364 
11365       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11366 	return NULL_TREE;
11367     }
11368 
11369   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11370 
11371   /* In cases where the mode was chosen by mode_for_vector, check that
11372      the target actually supports the chosen mode, or that it at least
11373      allows the vector mode to be replaced by a like-sized integer.  */
11374   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11375       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11376     return NULL_TREE;
11377 
11378   /* Re-attach the address-space qualifier if we canonicalized the scalar
11379      type.  */
11380   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11381     return build_qualified_type
11382 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11383 
11384   return vectype;
11385 }
11386 
11387 /* Function get_vectype_for_scalar_type.
11388 
11389    Returns the vector type corresponding to SCALAR_TYPE as supported
11390    by the target.  If GROUP_SIZE is nonzero and we're performing BB
11391    vectorization, make sure that the number of elements in the vector
11392    is no bigger than GROUP_SIZE.  */
11393 
11394 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11395 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11396 			     unsigned int group_size)
11397 {
11398   /* For BB vectorization, we should always have a group size once we've
11399      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11400      are tentative requests during things like early data reference
11401      analysis and pattern recognition.  */
11402   if (is_a <bb_vec_info> (vinfo))
11403     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11404   else
11405     group_size = 0;
11406 
11407   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11408 						      scalar_type);
11409   if (vectype && vinfo->vector_mode == VOIDmode)
11410     vinfo->vector_mode = TYPE_MODE (vectype);
11411 
11412   /* Register the natural choice of vector type, before the group size
11413      has been applied.  */
11414   if (vectype)
11415     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11416 
11417   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11418      try again with an explicit number of elements.  */
11419   if (vectype
11420       && group_size
11421       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11422     {
11423       /* Start with the biggest number of units that fits within
11424 	 GROUP_SIZE and halve it until we find a valid vector type.
11425 	 Usually either the first attempt will succeed or all will
11426 	 fail (in the latter case because GROUP_SIZE is too small
11427 	 for the target), but it's possible that a target could have
11428 	 a hole between supported vector types.
11429 
11430 	 If GROUP_SIZE is not a power of 2, this has the effect of
11431 	 trying the largest power of 2 that fits within the group,
11432 	 even though the group is not a multiple of that vector size.
11433 	 The BB vectorizer will then try to carve up the group into
11434 	 smaller pieces.  */
11435       unsigned int nunits = 1 << floor_log2 (group_size);
11436       do
11437 	{
11438 	  vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11439 							 scalar_type, nunits);
11440 	  nunits /= 2;
11441 	}
11442       while (nunits > 1 && !vectype);
11443     }
11444 
11445   return vectype;
11446 }
11447 
11448 /* Return the vector type corresponding to SCALAR_TYPE as supported
11449    by the target.  NODE, if nonnull, is the SLP tree node that will
11450    use the returned vector type.  */
11451 
11452 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11453 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11454 {
11455   unsigned int group_size = 0;
11456   if (node)
11457     {
11458       group_size = SLP_TREE_SCALAR_OPS (node).length ();
11459       if (group_size == 0)
11460 	group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11461     }
11462   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11463 }
11464 
11465 /* Function get_mask_type_for_scalar_type.
11466 
11467    Returns the mask type corresponding to a result of comparison
11468    of vectors of specified SCALAR_TYPE as supported by target.
11469    If GROUP_SIZE is nonzero and we're performing BB vectorization,
11470    make sure that the number of elements in the vector is no bigger
11471    than GROUP_SIZE.  */
11472 
11473 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11474 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11475 			       unsigned int group_size)
11476 {
11477   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11478 
11479   if (!vectype)
11480     return NULL;
11481 
11482   return truth_type_for (vectype);
11483 }
11484 
11485 /* Function get_same_sized_vectype
11486 
11487    Returns a vector type corresponding to SCALAR_TYPE of size
11488    VECTOR_TYPE if supported by the target.  */
11489 
11490 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11491 get_same_sized_vectype (tree scalar_type, tree vector_type)
11492 {
11493   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11494     return truth_type_for (vector_type);
11495 
11496   poly_uint64 nunits;
11497   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11498 		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11499     return NULL_TREE;
11500 
11501   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11502 					      scalar_type, nunits);
11503 }
11504 
11505 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11506    would not change the chosen vector modes.  */
11507 
11508 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11509 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11510 {
11511   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11512        i != vinfo->used_vector_modes.end (); ++i)
11513     if (!VECTOR_MODE_P (*i)
11514 	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11515       return false;
11516   return true;
11517 }
11518 
11519 /* Function vect_is_simple_use.
11520 
11521    Input:
11522    VINFO - the vect info of the loop or basic block that is being vectorized.
11523    OPERAND - operand in the loop or bb.
11524    Output:
11525    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11526      case OPERAND is an SSA_NAME that is defined in the vectorizable region
11527    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11528      the definition could be anywhere in the function
11529    DT - the type of definition
11530 
11531    Returns whether a stmt with OPERAND can be vectorized.
11532    For loops, supportable operands are constants, loop invariants, and operands
11533    that are defined by the current iteration of the loop.  Unsupportable
11534    operands are those that are defined by a previous iteration of the loop (as
11535    is the case in reduction/induction computations).
11536    For basic blocks, supportable operands are constants and bb invariants.
11537    For now, operands defined outside the basic block are not supported.  */
11538 
11539 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11540 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11541 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11542 {
11543   if (def_stmt_info_out)
11544     *def_stmt_info_out = NULL;
11545   if (def_stmt_out)
11546     *def_stmt_out = NULL;
11547   *dt = vect_unknown_def_type;
11548 
11549   if (dump_enabled_p ())
11550     {
11551       dump_printf_loc (MSG_NOTE, vect_location,
11552                        "vect_is_simple_use: operand ");
11553       if (TREE_CODE (operand) == SSA_NAME
11554 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
11555 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11556       else
11557 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11558     }
11559 
11560   if (CONSTANT_CLASS_P (operand))
11561     *dt = vect_constant_def;
11562   else if (is_gimple_min_invariant (operand))
11563     *dt = vect_external_def;
11564   else if (TREE_CODE (operand) != SSA_NAME)
11565     *dt = vect_unknown_def_type;
11566   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11567     *dt = vect_external_def;
11568   else
11569     {
11570       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11571       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11572       if (!stmt_vinfo)
11573 	*dt = vect_external_def;
11574       else
11575 	{
11576 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11577 	  def_stmt = stmt_vinfo->stmt;
11578 	  switch (gimple_code (def_stmt))
11579 	    {
11580 	    case GIMPLE_PHI:
11581 	    case GIMPLE_ASSIGN:
11582 	    case GIMPLE_CALL:
11583 	      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11584 	      break;
11585 	    default:
11586 	      *dt = vect_unknown_def_type;
11587 	      break;
11588 	    }
11589 	  if (def_stmt_info_out)
11590 	    *def_stmt_info_out = stmt_vinfo;
11591 	}
11592       if (def_stmt_out)
11593 	*def_stmt_out = def_stmt;
11594     }
11595 
11596   if (dump_enabled_p ())
11597     {
11598       dump_printf (MSG_NOTE, ", type of def: ");
11599       switch (*dt)
11600 	{
11601 	case vect_uninitialized_def:
11602 	  dump_printf (MSG_NOTE, "uninitialized\n");
11603 	  break;
11604 	case vect_constant_def:
11605 	  dump_printf (MSG_NOTE, "constant\n");
11606 	  break;
11607 	case vect_external_def:
11608 	  dump_printf (MSG_NOTE, "external\n");
11609 	  break;
11610 	case vect_internal_def:
11611 	  dump_printf (MSG_NOTE, "internal\n");
11612 	  break;
11613 	case vect_induction_def:
11614 	  dump_printf (MSG_NOTE, "induction\n");
11615 	  break;
11616 	case vect_reduction_def:
11617 	  dump_printf (MSG_NOTE, "reduction\n");
11618 	  break;
11619 	case vect_double_reduction_def:
11620 	  dump_printf (MSG_NOTE, "double reduction\n");
11621 	  break;
11622 	case vect_nested_cycle:
11623 	  dump_printf (MSG_NOTE, "nested cycle\n");
11624 	  break;
11625 	case vect_unknown_def_type:
11626 	  dump_printf (MSG_NOTE, "unknown\n");
11627 	  break;
11628 	}
11629     }
11630 
11631   if (*dt == vect_unknown_def_type)
11632     {
11633       if (dump_enabled_p ())
11634         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11635                          "Unsupported pattern.\n");
11636       return false;
11637     }
11638 
11639   return true;
11640 }
11641 
11642 /* Function vect_is_simple_use.
11643 
11644    Same as vect_is_simple_use but also determines the vector operand
11645    type of OPERAND and stores it to *VECTYPE.  If the definition of
11646    OPERAND is vect_uninitialized_def, vect_constant_def or
11647    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11648    is responsible to compute the best suited vector type for the
11649    scalar operand.  */
11650 
11651 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11652 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11653 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
11654 		    gimple **def_stmt_out)
11655 {
11656   stmt_vec_info def_stmt_info;
11657   gimple *def_stmt;
11658   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11659     return false;
11660 
11661   if (def_stmt_out)
11662     *def_stmt_out = def_stmt;
11663   if (def_stmt_info_out)
11664     *def_stmt_info_out = def_stmt_info;
11665 
11666   /* Now get a vector type if the def is internal, otherwise supply
11667      NULL_TREE and leave it up to the caller to figure out a proper
11668      type for the use stmt.  */
11669   if (*dt == vect_internal_def
11670       || *dt == vect_induction_def
11671       || *dt == vect_reduction_def
11672       || *dt == vect_double_reduction_def
11673       || *dt == vect_nested_cycle)
11674     {
11675       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11676       gcc_assert (*vectype != NULL_TREE);
11677       if (dump_enabled_p ())
11678 	dump_printf_loc (MSG_NOTE, vect_location,
11679 			 "vect_is_simple_use: vectype %T\n", *vectype);
11680     }
11681   else if (*dt == vect_uninitialized_def
11682 	   || *dt == vect_constant_def
11683 	   || *dt == vect_external_def)
11684     *vectype = NULL_TREE;
11685   else
11686     gcc_unreachable ();
11687 
11688   return true;
11689 }
11690 
11691 
11692 /* Function supportable_widening_operation
11693 
11694    Check whether an operation represented by the code CODE is a
11695    widening operation that is supported by the target platform in
11696    vector form (i.e., when operating on arguments of type VECTYPE_IN
11697    producing a result of type VECTYPE_OUT).
11698 
11699    Widening operations we currently support are NOP (CONVERT), FLOAT,
11700    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
11701    are supported by the target platform either directly (via vector
11702    tree-codes), or via target builtins.
11703 
11704    Output:
11705    - CODE1 and CODE2 are codes of vector operations to be used when
11706    vectorizing the operation, if available.
11707    - MULTI_STEP_CVT determines the number of required intermediate steps in
11708    case of multi-step conversion (like char->short->int - in that case
11709    MULTI_STEP_CVT will be 1).
11710    - INTERM_TYPES contains the intermediate type required to perform the
11711    widening operation (short in the above example).  */
11712 
11713 bool
supportable_widening_operation(enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11714 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11715 				tree vectype_out, tree vectype_in,
11716                                 enum tree_code *code1, enum tree_code *code2,
11717                                 int *multi_step_cvt,
11718                                 vec<tree> *interm_types)
11719 {
11720   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11721   class loop *vect_loop = NULL;
11722   machine_mode vec_mode;
11723   enum insn_code icode1, icode2;
11724   optab optab1, optab2;
11725   tree vectype = vectype_in;
11726   tree wide_vectype = vectype_out;
11727   enum tree_code c1, c2;
11728   int i;
11729   tree prev_type, intermediate_type;
11730   machine_mode intermediate_mode, prev_mode;
11731   optab optab3, optab4;
11732 
11733   *multi_step_cvt = 0;
11734   if (loop_info)
11735     vect_loop = LOOP_VINFO_LOOP (loop_info);
11736 
11737   switch (code)
11738     {
11739     case WIDEN_MULT_EXPR:
11740       /* The result of a vectorized widening operation usually requires
11741 	 two vectors (because the widened results do not fit into one vector).
11742 	 The generated vector results would normally be expected to be
11743 	 generated in the same order as in the original scalar computation,
11744 	 i.e. if 8 results are generated in each vector iteration, they are
11745 	 to be organized as follows:
11746 		vect1: [res1,res2,res3,res4],
11747 		vect2: [res5,res6,res7,res8].
11748 
11749 	 However, in the special case that the result of the widening
11750 	 operation is used in a reduction computation only, the order doesn't
11751 	 matter (because when vectorizing a reduction we change the order of
11752 	 the computation).  Some targets can take advantage of this and
11753 	 generate more efficient code.  For example, targets like Altivec,
11754 	 that support widen_mult using a sequence of {mult_even,mult_odd}
11755 	 generate the following vectors:
11756 		vect1: [res1,res3,res5,res7],
11757 		vect2: [res2,res4,res6,res8].
11758 
11759 	 When vectorizing outer-loops, we execute the inner-loop sequentially
11760 	 (each vectorized inner-loop iteration contributes to VF outer-loop
11761 	 iterations in parallel).  We therefore don't allow to change the
11762 	 order of the computation in the inner-loop during outer-loop
11763 	 vectorization.  */
11764       /* TODO: Another case in which order doesn't *really* matter is when we
11765 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
11766 	 Normally, pack_trunc performs an even/odd permute, whereas the
11767 	 repack from an even/odd expansion would be an interleave, which
11768 	 would be significantly simpler for e.g. AVX2.  */
11769       /* In any case, in order to avoid duplicating the code below, recurse
11770 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
11771 	 are properly set up for the caller.  If we fail, we'll continue with
11772 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
11773       if (vect_loop
11774 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11775 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
11776 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11777 					     stmt_info, vectype_out,
11778 					     vectype_in, code1, code2,
11779 					     multi_step_cvt, interm_types))
11780         {
11781           /* Elements in a vector with vect_used_by_reduction property cannot
11782              be reordered if the use chain with this property does not have the
11783              same operation.  One such an example is s += a * b, where elements
11784              in a and b cannot be reordered.  Here we check if the vector defined
11785              by STMT is only directly used in the reduction statement.  */
11786 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
11787 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11788 	  if (use_stmt_info
11789 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11790 	    return true;
11791         }
11792       c1 = VEC_WIDEN_MULT_LO_EXPR;
11793       c2 = VEC_WIDEN_MULT_HI_EXPR;
11794       break;
11795 
11796     case DOT_PROD_EXPR:
11797       c1 = DOT_PROD_EXPR;
11798       c2 = DOT_PROD_EXPR;
11799       break;
11800 
11801     case SAD_EXPR:
11802       c1 = SAD_EXPR;
11803       c2 = SAD_EXPR;
11804       break;
11805 
11806     case VEC_WIDEN_MULT_EVEN_EXPR:
11807       /* Support the recursion induced just above.  */
11808       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11809       c2 = VEC_WIDEN_MULT_ODD_EXPR;
11810       break;
11811 
11812     case WIDEN_LSHIFT_EXPR:
11813       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11814       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11815       break;
11816 
11817     CASE_CONVERT:
11818       c1 = VEC_UNPACK_LO_EXPR;
11819       c2 = VEC_UNPACK_HI_EXPR;
11820       break;
11821 
11822     case FLOAT_EXPR:
11823       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11824       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11825       break;
11826 
11827     case FIX_TRUNC_EXPR:
11828       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11829       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11830       break;
11831 
11832     default:
11833       gcc_unreachable ();
11834     }
11835 
11836   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11837     std::swap (c1, c2);
11838 
11839   if (code == FIX_TRUNC_EXPR)
11840     {
11841       /* The signedness is determined from output operand.  */
11842       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11843       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11844     }
11845   else if (CONVERT_EXPR_CODE_P (code)
11846 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11847 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
11848 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11849 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11850     {
11851       /* If the input and result modes are the same, a different optab
11852 	 is needed where we pass in the number of units in vectype.  */
11853       optab1 = vec_unpacks_sbool_lo_optab;
11854       optab2 = vec_unpacks_sbool_hi_optab;
11855     }
11856   else
11857     {
11858       optab1 = optab_for_tree_code (c1, vectype, optab_default);
11859       optab2 = optab_for_tree_code (c2, vectype, optab_default);
11860     }
11861 
11862   if (!optab1 || !optab2)
11863     return false;
11864 
11865   vec_mode = TYPE_MODE (vectype);
11866   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11867        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11868     return false;
11869 
11870   *code1 = c1;
11871   *code2 = c2;
11872 
11873   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11874       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11875     {
11876       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11877 	return true;
11878       /* For scalar masks we may have different boolean
11879 	 vector types having the same QImode.  Thus we
11880 	 add additional check for elements number.  */
11881       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11882 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11883 	return true;
11884     }
11885 
11886   /* Check if it's a multi-step conversion that can be done using intermediate
11887      types.  */
11888 
11889   prev_type = vectype;
11890   prev_mode = vec_mode;
11891 
11892   if (!CONVERT_EXPR_CODE_P (code))
11893     return false;
11894 
11895   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11896      intermediate steps in promotion sequence.  We try
11897      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11898      not.  */
11899   interm_types->create (MAX_INTERM_CVT_STEPS);
11900   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11901     {
11902       intermediate_mode = insn_data[icode1].operand[0].mode;
11903       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11904 	intermediate_type
11905 	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
11906       else
11907 	intermediate_type
11908 	  = lang_hooks.types.type_for_mode (intermediate_mode,
11909 					    TYPE_UNSIGNED (prev_type));
11910 
11911       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11912 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
11913 	  && intermediate_mode == prev_mode
11914 	  && SCALAR_INT_MODE_P (prev_mode))
11915 	{
11916 	  /* If the input and result modes are the same, a different optab
11917 	     is needed where we pass in the number of units in vectype.  */
11918 	  optab3 = vec_unpacks_sbool_lo_optab;
11919 	  optab4 = vec_unpacks_sbool_hi_optab;
11920 	}
11921       else
11922 	{
11923 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11924 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11925 	}
11926 
11927       if (!optab3 || !optab4
11928           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11929 	  || insn_data[icode1].operand[0].mode != intermediate_mode
11930 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11931 	  || insn_data[icode2].operand[0].mode != intermediate_mode
11932 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
11933 	      == CODE_FOR_nothing)
11934 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
11935 	      == CODE_FOR_nothing))
11936 	break;
11937 
11938       interm_types->quick_push (intermediate_type);
11939       (*multi_step_cvt)++;
11940 
11941       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11942 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11943 	{
11944 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11945 	    return true;
11946 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11947 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11948 	    return true;
11949 	}
11950 
11951       prev_type = intermediate_type;
11952       prev_mode = intermediate_mode;
11953     }
11954 
11955   interm_types->release ();
11956   return false;
11957 }
11958 
11959 
11960 /* Function supportable_narrowing_operation
11961 
11962    Check whether an operation represented by the code CODE is a
11963    narrowing operation that is supported by the target platform in
11964    vector form (i.e., when operating on arguments of type VECTYPE_IN
11965    and producing a result of type VECTYPE_OUT).
11966 
11967    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11968    and FLOAT.  This function checks if these operations are supported by
11969    the target platform directly via vector tree-codes.
11970 
11971    Output:
11972    - CODE1 is the code of a vector operation to be used when
11973    vectorizing the operation, if available.
11974    - MULTI_STEP_CVT determines the number of required intermediate steps in
11975    case of multi-step conversion (like int->short->char - in that case
11976    MULTI_STEP_CVT will be 1).
11977    - INTERM_TYPES contains the intermediate type required to perform the
11978    narrowing operation (short in the above example).   */
11979 
11980 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)11981 supportable_narrowing_operation (enum tree_code code,
11982 				 tree vectype_out, tree vectype_in,
11983 				 enum tree_code *code1, int *multi_step_cvt,
11984                                  vec<tree> *interm_types)
11985 {
11986   machine_mode vec_mode;
11987   enum insn_code icode1;
11988   optab optab1, interm_optab;
11989   tree vectype = vectype_in;
11990   tree narrow_vectype = vectype_out;
11991   enum tree_code c1;
11992   tree intermediate_type, prev_type;
11993   machine_mode intermediate_mode, prev_mode;
11994   int i;
11995   bool uns;
11996 
11997   *multi_step_cvt = 0;
11998   switch (code)
11999     {
12000     CASE_CONVERT:
12001       c1 = VEC_PACK_TRUNC_EXPR;
12002       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12003 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
12004 	  && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12005 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12006 	optab1 = vec_pack_sbool_trunc_optab;
12007       else
12008 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
12009       break;
12010 
12011     case FIX_TRUNC_EXPR:
12012       c1 = VEC_PACK_FIX_TRUNC_EXPR;
12013       /* The signedness is determined from output operand.  */
12014       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12015       break;
12016 
12017     case FLOAT_EXPR:
12018       c1 = VEC_PACK_FLOAT_EXPR;
12019       optab1 = optab_for_tree_code (c1, vectype, optab_default);
12020       break;
12021 
12022     default:
12023       gcc_unreachable ();
12024     }
12025 
12026   if (!optab1)
12027     return false;
12028 
12029   vec_mode = TYPE_MODE (vectype);
12030   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12031     return false;
12032 
12033   *code1 = c1;
12034 
12035   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12036     {
12037       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12038 	return true;
12039       /* For scalar masks we may have different boolean
12040 	 vector types having the same QImode.  Thus we
12041 	 add additional check for elements number.  */
12042       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12043 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12044 	return true;
12045     }
12046 
12047   if (code == FLOAT_EXPR)
12048     return false;
12049 
12050   /* Check if it's a multi-step conversion that can be done using intermediate
12051      types.  */
12052   prev_mode = vec_mode;
12053   prev_type = vectype;
12054   if (code == FIX_TRUNC_EXPR)
12055     uns = TYPE_UNSIGNED (vectype_out);
12056   else
12057     uns = TYPE_UNSIGNED (vectype);
12058 
12059   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12060      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12061      costly than signed.  */
12062   if (code == FIX_TRUNC_EXPR && uns)
12063     {
12064       enum insn_code icode2;
12065 
12066       intermediate_type
12067 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12068       interm_optab
12069 	= optab_for_tree_code (c1, intermediate_type, optab_default);
12070       if (interm_optab != unknown_optab
12071 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12072 	  && insn_data[icode1].operand[0].mode
12073 	     == insn_data[icode2].operand[0].mode)
12074 	{
12075 	  uns = false;
12076 	  optab1 = interm_optab;
12077 	  icode1 = icode2;
12078 	}
12079     }
12080 
12081   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12082      intermediate steps in promotion sequence.  We try
12083      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
12084   interm_types->create (MAX_INTERM_CVT_STEPS);
12085   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12086     {
12087       intermediate_mode = insn_data[icode1].operand[0].mode;
12088       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12089 	intermediate_type
12090 	  = vect_double_mask_nunits (prev_type, intermediate_mode);
12091       else
12092 	intermediate_type
12093 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12094       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12095 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
12096 	  && intermediate_mode == prev_mode
12097 	  && SCALAR_INT_MODE_P (prev_mode))
12098 	interm_optab = vec_pack_sbool_trunc_optab;
12099       else
12100 	interm_optab
12101 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12102 				 optab_default);
12103       if (!interm_optab
12104 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12105 	  || insn_data[icode1].operand[0].mode != intermediate_mode
12106 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12107 	      == CODE_FOR_nothing))
12108 	break;
12109 
12110       interm_types->quick_push (intermediate_type);
12111       (*multi_step_cvt)++;
12112 
12113       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12114 	{
12115 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12116 	    return true;
12117 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12118 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12119 	    return true;
12120 	}
12121 
12122       prev_mode = intermediate_mode;
12123       prev_type = intermediate_type;
12124       optab1 = interm_optab;
12125     }
12126 
12127   interm_types->release ();
12128   return false;
12129 }
12130 
12131 /* Generate and return a statement that sets vector mask MASK such that
12132    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
12133 
12134 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)12135 vect_gen_while (tree mask, tree start_index, tree end_index)
12136 {
12137   tree cmp_type = TREE_TYPE (start_index);
12138   tree mask_type = TREE_TYPE (mask);
12139   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12140 						       cmp_type, mask_type,
12141 						       OPTIMIZE_FOR_SPEED));
12142   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12143 					    start_index, end_index,
12144 					    build_zero_cst (mask_type));
12145   gimple_call_set_lhs (call, mask);
12146   return call;
12147 }
12148 
12149 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12150    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
12151 
12152 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12153 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12154 		    tree end_index)
12155 {
12156   tree tmp = make_ssa_name (mask_type);
12157   gcall *call = vect_gen_while (tmp, start_index, end_index);
12158   gimple_seq_add_stmt (seq, call);
12159   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12160 }
12161 
12162 /* Try to compute the vector types required to vectorize STMT_INFO,
12163    returning true on success and false if vectorization isn't possible.
12164    If GROUP_SIZE is nonzero and we're performing BB vectorization,
12165    take sure that the number of elements in the vectors is no bigger
12166    than GROUP_SIZE.
12167 
12168    On success:
12169 
12170    - Set *STMT_VECTYPE_OUT to:
12171      - NULL_TREE if the statement doesn't need to be vectorized;
12172      - the equivalent of STMT_VINFO_VECTYPE otherwise.
12173 
12174    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12175      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12176      statement does not help to determine the overall number of units.  */
12177 
12178 opt_result
vect_get_vector_types_for_stmt(stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12179 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
12180 				tree *stmt_vectype_out,
12181 				tree *nunits_vectype_out,
12182 				unsigned int group_size)
12183 {
12184   vec_info *vinfo = stmt_info->vinfo;
12185   gimple *stmt = stmt_info->stmt;
12186 
12187   /* For BB vectorization, we should always have a group size once we've
12188      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12189      are tentative requests during things like early data reference
12190      analysis and pattern recognition.  */
12191   if (is_a <bb_vec_info> (vinfo))
12192     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12193   else
12194     group_size = 0;
12195 
12196   *stmt_vectype_out = NULL_TREE;
12197   *nunits_vectype_out = NULL_TREE;
12198 
12199   if (gimple_get_lhs (stmt) == NULL_TREE
12200       /* MASK_STORE has no lhs, but is ok.  */
12201       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12202     {
12203       if (is_a <gcall *> (stmt))
12204 	{
12205 	  /* Ignore calls with no lhs.  These must be calls to
12206 	     #pragma omp simd functions, and what vectorization factor
12207 	     it really needs can't be determined until
12208 	     vectorizable_simd_clone_call.  */
12209 	  if (dump_enabled_p ())
12210 	    dump_printf_loc (MSG_NOTE, vect_location,
12211 			     "defer to SIMD clone analysis.\n");
12212 	  return opt_result::success ();
12213 	}
12214 
12215       return opt_result::failure_at (stmt,
12216 				     "not vectorized: irregular stmt.%G", stmt);
12217     }
12218 
12219   if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12220     return opt_result::failure_at (stmt,
12221 				   "not vectorized: vector stmt in loop:%G",
12222 				   stmt);
12223 
12224   tree vectype;
12225   tree scalar_type = NULL_TREE;
12226   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12227     {
12228       vectype = STMT_VINFO_VECTYPE (stmt_info);
12229       if (dump_enabled_p ())
12230 	dump_printf_loc (MSG_NOTE, vect_location,
12231 			 "precomputed vectype: %T\n", vectype);
12232     }
12233   else if (vect_use_mask_type_p (stmt_info))
12234     {
12235       unsigned int precision = stmt_info->mask_precision;
12236       scalar_type = build_nonstandard_integer_type (precision, 1);
12237       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12238       if (!vectype)
12239 	return opt_result::failure_at (stmt, "not vectorized: unsupported"
12240 				       " data-type %T\n", scalar_type);
12241       if (dump_enabled_p ())
12242 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12243     }
12244   else
12245     {
12246       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12247 	scalar_type = TREE_TYPE (DR_REF (dr));
12248       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12249 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12250       else
12251 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12252 
12253       if (dump_enabled_p ())
12254 	{
12255 	  if (group_size)
12256 	    dump_printf_loc (MSG_NOTE, vect_location,
12257 			     "get vectype for scalar type (group size %d):"
12258 			     " %T\n", group_size, scalar_type);
12259 	  else
12260 	    dump_printf_loc (MSG_NOTE, vect_location,
12261 			     "get vectype for scalar type: %T\n", scalar_type);
12262 	}
12263       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12264       if (!vectype)
12265 	return opt_result::failure_at (stmt,
12266 				       "not vectorized:"
12267 				       " unsupported data-type %T\n",
12268 				       scalar_type);
12269 
12270       if (dump_enabled_p ())
12271 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12272     }
12273   *stmt_vectype_out = vectype;
12274 
12275   /* Don't try to compute scalar types if the stmt produces a boolean
12276      vector; use the existing vector type instead.  */
12277   tree nunits_vectype = vectype;
12278   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12279     {
12280       /* The number of units is set according to the smallest scalar
12281 	 type (or the largest vector size, but we only support one
12282 	 vector size per vectorization).  */
12283       HOST_WIDE_INT dummy;
12284       scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12285       if (scalar_type != TREE_TYPE (vectype))
12286 	{
12287 	  if (dump_enabled_p ())
12288 	    dump_printf_loc (MSG_NOTE, vect_location,
12289 			     "get vectype for smallest scalar type: %T\n",
12290 			     scalar_type);
12291 	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12292 							group_size);
12293 	  if (!nunits_vectype)
12294 	    return opt_result::failure_at
12295 	      (stmt, "not vectorized: unsupported data-type %T\n",
12296 	       scalar_type);
12297 	  if (dump_enabled_p ())
12298 	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12299 			     nunits_vectype);
12300 	}
12301     }
12302 
12303   gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12304 			  TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12305 
12306   if (dump_enabled_p ())
12307     {
12308       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12309       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12310       dump_printf (MSG_NOTE, "\n");
12311     }
12312 
12313   *nunits_vectype_out = nunits_vectype;
12314   return opt_result::success ();
12315 }
12316